Bug fix: closing the file
[codimension.git] / thirdparty / libantlr3c-3.2 / src / antlr3baserecognizer.c
blobafd29cbe67f7e1778302b47861b754ffd67fff9c
1 /** \file
2 * Contains the base functions that all recognizers require.
3 * Any function can be overridden by a lexer/parser/tree parser or by the
4 * ANTLR3 programmer.
5 *
6 * \addtogroup pANTLR3_BASE_RECOGNIZER
7 * @{
8 */
9 #include <antlr3baserecognizer.h>
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
16 // All rights reserved.
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
20 // are met:
21 // 1. Redistributions of source code must retain the above copyright
22 // notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 // notice, this list of conditions and the following disclaimer in the
25 // documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #ifdef ANTLR3_WINDOWS
41 #pragma warning( disable : 4100 )
42 #endif
44 /* Interface functions -standard implementations cover parser and treeparser
45 * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46 * most of these functions.
48 static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);
49 static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);
50 static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);
51 static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52 static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
54 static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55 static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);
56 static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57 static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58 static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59 static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);
60 static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);
61 static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63 static void recover (pANTLR3_BASE_RECOGNIZER recognizer);
64 static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65 static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66 static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67 static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68 static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69 static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);
70 static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71 static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72 static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73 static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74 static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75 static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76 static void reset (pANTLR3_BASE_RECOGNIZER recognizer);
77 static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);
78 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
80 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81 static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer);
83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
86 pANTLR3_BASE_RECOGNIZER recognizer;
88 // Allocate memory for the structure
90 recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
92 if (recognizer == NULL)
94 // Allocation failed
96 return NULL;
100 // If we have been supplied with a pre-existing recognizer state
101 // then we just install it, otherwise we must create one from scratch
103 if (state == NULL)
105 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
107 if (recognizer->state == NULL)
109 ANTLR3_FREE(recognizer);
110 return NULL;
113 // Initialize any new recognizer state
115 recognizer->state->errorRecovery = ANTLR3_FALSE;
116 recognizer->state->lastErrorIndex = -1;
117 recognizer->state->failed = ANTLR3_FALSE;
118 recognizer->state->errorCount = 0;
119 recognizer->state->backtracking = 0;
120 recognizer->state->following = NULL;
121 recognizer->state->ruleMemo = NULL;
122 recognizer->state->tokenNames = NULL;
123 recognizer->state->sizeHint = sizeHint;
124 recognizer->state->tokSource = NULL;
125 recognizer->state->tokFactory = NULL;
127 // Rather than check to see if we must initialize
128 // the stack every time we are asked for an new rewrite stream
129 // we just always create an empty stack and then just
130 // free it when the base recognizer is freed.
132 recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size.
134 if (recognizer->state->rStreams == NULL)
136 // Out of memory
138 ANTLR3_FREE(recognizer->state);
139 ANTLR3_FREE(recognizer);
140 return NULL;
143 else
145 // Install the one we were given, and do not reset it here
146 // as it will either already have been initialized or will
147 // be in a state that needs to be preserved.
149 recognizer->state = state;
152 // Install the BR API
154 recognizer->alreadyParsedRule = alreadyParsedRule;
155 recognizer->beginResync = beginResync;
156 recognizer->combineFollows = combineFollows;
157 recognizer->beginBacktrack = beginBacktrack;
158 recognizer->endBacktrack = endBacktrack;
159 recognizer->computeCSRuleFollow = computeCSRuleFollow;
160 recognizer->computeErrorRecoverySet = computeErrorRecoverySet;
161 recognizer->consumeUntil = consumeUntil;
162 recognizer->consumeUntilSet = consumeUntilSet;
163 recognizer->displayRecognitionError = displayRecognitionError;
164 recognizer->endResync = endResync;
165 recognizer->exConstruct = antlr3MTExceptionNew;
166 recognizer->getRuleInvocationStack = getRuleInvocationStack;
167 recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168 recognizer->getRuleMemoization = getRuleMemoization;
169 recognizer->match = match;
170 recognizer->matchAny = matchAny;
171 recognizer->memoize = memoize;
172 recognizer->mismatch = mismatch;
173 recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken;
174 recognizer->mismatchIsMissingToken = mismatchIsMissingToken;
175 recognizer->recover = recover;
176 recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177 recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet;
178 recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken;
179 recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors;
180 recognizer->reportError = reportError;
181 recognizer->reset = reset;
182 recognizer->synpred = synpred;
183 recognizer->toStrings = toStrings;
184 recognizer->getCurrentInputSymbol = getCurrentInputSymbol;
185 recognizer->getMissingSymbol = getMissingSymbol;
186 recognizer->debugger = NULL;
188 recognizer->free = freeBR;
190 /* Initialize variables
192 recognizer->type = type;
194 recognizer->userData = NULL;
196 return recognizer;
198 static void
199 freeBR (pANTLR3_BASE_RECOGNIZER recognizer)
201 pANTLR3_EXCEPTION thisE;
203 // Did we have a state allocated?
205 if (recognizer->state != NULL)
207 // Free any rule memoization we set up
209 if (recognizer->state->ruleMemo != NULL)
211 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
212 recognizer->state->ruleMemo = NULL;
215 // Free any exception space we have left around
217 thisE = recognizer->state->exception;
218 if (thisE != NULL)
220 thisE->freeEx(thisE);
223 // Free any rewrite streams we have allocated
225 if (recognizer->state->rStreams != NULL)
227 vectorFree(recognizer->state->rStreams);
230 // Free up any token factory we created (error recovery for instance)
232 if (recognizer->state->tokFactory != NULL)
234 recognizer->state->tokFactory->close(recognizer->state->tokFactory);
236 // Free the shared state memory
238 ANTLR3_FREE(recognizer->state);
241 // Free the actual recognizer space
243 ANTLR3_FREE(recognizer);
247 * Creates a new Mismatched Token Exception and inserts in the recognizer
248 * exception stack.
250 * \param recognizer
251 * Context pointer for this recognizer
254 ANTLR3_API void
255 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
257 /* Create a basic recognition exception structure
259 antlr3RecognitionExceptionNew(recognizer);
261 /* Now update it to indicate this is a Mismatched token exception
263 recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME;
264 recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
266 return;
269 ANTLR3_API void
270 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
272 pANTLR3_EXCEPTION ex;
273 pANTLR3_LEXER lexer;
274 pANTLR3_PARSER parser;
275 pANTLR3_TREE_PARSER tparser;
277 pANTLR3_INPUT_STREAM ins;
278 pANTLR3_INT_STREAM is;
279 pANTLR3_COMMON_TOKEN_STREAM cts;
280 pANTLR3_TREE_NODE_STREAM tns;
282 ins = NULL;
283 cts = NULL;
284 tns = NULL;
285 is = NULL;
286 lexer = NULL;
287 parser = NULL;
288 tparser = NULL;
290 switch (recognizer->type)
292 case ANTLR3_TYPE_LEXER:
294 lexer = (pANTLR3_LEXER) (recognizer->super);
295 ins = lexer->input;
296 is = ins->istream;
298 break;
300 case ANTLR3_TYPE_PARSER:
302 parser = (pANTLR3_PARSER) (recognizer->super);
303 cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
304 is = parser->tstream->istream;
306 break;
308 case ANTLR3_TYPE_TREE_PARSER:
310 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
311 tns = tparser->ctnstream->tnstream;
312 is = tns->istream;
314 break;
316 default:
318 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
319 return;
321 break;
324 /* Create a basic exception structure
326 ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
327 (void *)ANTLR3_RECOGNITION_EX_NAME,
328 NULL,
329 ANTLR3_FALSE);
331 /* Rest of information depends on the base type of the
332 * input stream.
334 switch (is->type & ANTLR3_INPUT_MASK)
336 case ANTLR3_CHARSTREAM:
338 ex->c = is->_LA (is, 1); /* Current input character */
339 ex->line = ins->getLine (ins); /* Line number comes from stream */
340 ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */
341 ex->index = is->index (is);
342 ex->streamName = ins->fileName;
343 ex->message = "Unexpected character";
344 break;
346 case ANTLR3_TOKENSTREAM:
348 ex->token = tokLT (cts->tstream, 1); /* Current input token */
349 ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->line;
350 ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->charPosition;
351 ex->index = cts->tstream->istream->index (cts->tstream->istream);
352 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
354 ex->streamName = NULL;
356 else
358 ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
360 ex->message = "Unexpected token";
361 break;
363 case ANTLR3_COMMONTREENODE:
365 ex->token = tns->_LT (tns, 1); /* Current input tree node */
366 ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine (ex->token);
367 ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine (ex->token);
368 ex->index = tns->istream->index (tns->istream);
370 // Are you ready for this? Deep breath now...
373 pANTLR3_COMMON_TREE tnode;
375 tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
377 if (tnode->token == NULL)
379 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
381 else
383 if (tnode->token->input == NULL)
385 ex->streamName = NULL;
387 else
389 ex->streamName = tnode->token->input->fileName;
392 ex->message = "Unexpected node";
394 break;
397 ex->input = is;
398 ex->nextException = recognizer->state->exception; /* So we don't leak the memory */
399 recognizer->state->exception = ex;
400 recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */
402 return;
406 /// Match current input symbol against ttype. Upon error, do one token
407 /// insertion or deletion if possible.
408 /// To turn off single token insertion or deletion error
409 /// recovery, override mismatchRecover() and have it call
410 /// plain mismatch(), which does not recover. Then any error
411 /// in a rule will cause an exception and immediate exit from
412 /// rule. Rule would recover by resynchronizing to the set of
413 /// symbols that can follow rule ref.
415 static void *
416 match( pANTLR3_BASE_RECOGNIZER recognizer,
417 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
419 pANTLR3_PARSER parser;
420 pANTLR3_TREE_PARSER tparser;
421 pANTLR3_INT_STREAM is;
422 void * matchedSymbol;
424 switch (recognizer->type)
426 case ANTLR3_TYPE_PARSER:
428 parser = (pANTLR3_PARSER) (recognizer->super);
429 tparser = NULL;
430 is = parser->tstream->istream;
432 break;
434 case ANTLR3_TYPE_TREE_PARSER:
436 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
437 parser = NULL;
438 is = tparser->ctnstream->tnstream->istream;
440 break;
442 default:
444 ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
445 return ANTLR3_FALSE;
447 break;
450 // Pick up the current input token/node for assignment to labels
452 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
454 if (is->_LA(is, 1) == ttype)
456 // The token was the one we were told to expect
458 is->consume(is); // Consume that token from the stream
459 recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were)
460 recognizer->state->failed = ANTLR3_FALSE; // The match was a success
461 return matchedSymbol; // We are done
464 // We did not find the expected token type, if we are backtracking then
465 // we just set the failed flag and return.
467 if (recognizer->state->backtracking > 0)
469 // Backtracking is going on
471 recognizer->state->failed = ANTLR3_TRUE;
472 return matchedSymbol;
475 // We did not find the expected token and there is no backtracking
476 // going on, so we mismatch, which creates an exception in the recognizer exception
477 // stack.
479 matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
480 return matchedSymbol;
483 /// Consumes the next token, whatever it is, and resets the recognizer state
484 /// so that it is not in error.
486 /// \param recognizer
487 /// Recognizer context pointer
489 static void
490 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
492 pANTLR3_PARSER parser;
493 pANTLR3_TREE_PARSER tparser;
494 pANTLR3_INT_STREAM is;
496 switch (recognizer->type)
498 case ANTLR3_TYPE_PARSER:
500 parser = (pANTLR3_PARSER) (recognizer->super);
501 tparser = NULL;
502 is = parser->tstream->istream;
504 break;
506 case ANTLR3_TYPE_TREE_PARSER:
508 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
509 parser = NULL;
510 is = tparser->ctnstream->tnstream->istream;
512 break;
514 default:
516 ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
517 return;
519 break;
521 recognizer->state->errorRecovery = ANTLR3_FALSE;
522 recognizer->state->failed = ANTLR3_FALSE;
523 is->consume(is);
525 return;
529 static ANTLR3_BOOLEAN
530 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
532 ANTLR3_UINT32 nextt;
534 nextt = is->_LA(is, 2);
536 if (nextt == ttype)
538 if (recognizer->state->exception != NULL)
540 recognizer->state->exception->expecting = nextt;
542 return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted
544 else
546 return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted
552 static ANTLR3_BOOLEAN
553 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
555 ANTLR3_BOOLEAN retcode;
556 pANTLR3_BITSET followClone;
557 pANTLR3_BITSET viableTokensFollowingThisRule;
559 if (follow == NULL)
561 // There is no information about the tokens that can follow the last one
562 // hence we must say that the current one we found is not a member of the
563 // follow set and does not indicate a missing token. We will just consume this
564 // single token and see if the parser works it out from there.
566 return ANTLR3_FALSE;
569 followClone = NULL;
570 viableTokensFollowingThisRule = NULL;
572 // The C bitset maps are laid down at compile time by the
573 // C code generation. Hence we cannot remove things from them
574 // and so on. So, in order to remove EOR (if we need to) then
575 // we clone the static bitset.
577 followClone = antlr3BitsetLoad(follow);
578 if (followClone == NULL)
580 return ANTLR3_FALSE;
583 // Compute what can follow this grammar reference
585 if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
587 // EOR can follow, but if we are not the start symbol, we
588 // need to remove it.
590 if (recognizer->state->following->vector->count >= 0)
592 followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
595 // Now compute the visiable tokens that can follow this rule, according to context
596 // and make them part of the follow set.
598 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
599 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
602 /// if current token is consistent with what could come after set
603 /// then we know we're missing a token; error recovery is free to
604 /// "insert" the missing token
606 /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
607 /// in follow set to indicate that the fall of the start symbol is
608 /// in the set (EOF can follow).
610 if ( followClone->isMember(followClone, is->_LA(is, 1))
611 || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
614 retcode = ANTLR3_TRUE;
616 else
618 retcode = ANTLR3_FALSE;
621 if (viableTokensFollowingThisRule != NULL)
623 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
625 if (followClone != NULL)
627 followClone->free(followClone);
630 return retcode;
634 /// Factor out what to do upon token mismatch so tree parsers can behave
635 /// differently. Override and call mismatchRecover(input, ttype, follow)
636 /// to get single token insertion and deletion. Use this to turn off
637 /// single token insertion and deletion. Override mismatchRecover
638 /// to call this instead.
640 /// \remark mismatch only works for parsers and must be overridden for anything else.
642 static void
643 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
645 pANTLR3_PARSER parser;
646 pANTLR3_INT_STREAM is;
648 // Install a mismatched token exception in the exception stack
650 antlr3MTExceptionNew(recognizer);
651 recognizer->state->exception->expecting = ttype;
653 switch (recognizer->type)
655 case ANTLR3_TYPE_PARSER:
657 parser = (pANTLR3_PARSER) (recognizer->super);
658 is = parser->tstream->istream;
660 break;
662 default:
664 ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
665 return;
667 break;
670 if (mismatchIsUnwantedToken(recognizer, is, ttype))
672 // Create a basic recognition exception structure
674 antlr3RecognitionExceptionNew(recognizer);
676 // Now update it to indicate this is an unwanted token exception
678 recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
679 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
681 return;
684 if (mismatchIsMissingToken(recognizer, is, follow))
686 // Create a basic recognition exception structure
688 antlr3RecognitionExceptionNew(recognizer);
690 // Now update it to indicate this is an unwanted token exception
692 recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
693 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
695 return;
698 // Just a mismatched token is all we can dtermine
700 antlr3MTExceptionNew(recognizer);
702 return;
704 /// Report a recognition problem.
706 /// This method sets errorRecovery to indicate the parser is recovering
707 /// not parsing. Once in recovery mode, no errors are generated.
708 /// To get out of recovery mode, the parser must successfully match
709 /// a token (after a resync). So it will go:
711 /// 1. error occurs
712 /// 2. enter recovery mode, report error
713 /// 3. consume until token found in resynch set
714 /// 4. try to resume parsing
715 /// 5. next match() will reset errorRecovery mode
717 /// If you override, make sure to update errorCount if you care about that.
719 static void
720 reportError (pANTLR3_BASE_RECOGNIZER recognizer)
722 if (recognizer->state->errorRecovery == ANTLR3_TRUE)
724 // Already in error recovery so don't display another error while doing so
726 return;
729 // Signal we are in error recovery now
731 recognizer->state->errorRecovery = ANTLR3_TRUE;
733 // Indicate this recognizer had an error while processing.
735 recognizer->state->errorCount++;
737 // Call the error display routine
739 recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
742 static void
743 beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
745 if (recognizer->debugger != NULL)
747 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
751 static void
752 endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
754 if (recognizer->debugger != NULL)
756 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
759 static void
760 beginResync (pANTLR3_BASE_RECOGNIZER recognizer)
762 if (recognizer->debugger != NULL)
764 recognizer->debugger->beginResync(recognizer->debugger);
768 static void
769 endResync (pANTLR3_BASE_RECOGNIZER recognizer)
771 if (recognizer->debugger != NULL)
773 recognizer->debugger->endResync(recognizer->debugger);
777 /// Compute the error recovery set for the current rule.
778 /// Documentation below is from the Java implementation.
780 /// During rule invocation, the parser pushes the set of tokens that can
781 /// follow that rule reference on the stack; this amounts to
782 /// computing FIRST of what follows the rule reference in the
783 /// enclosing rule. This local follow set only includes tokens
784 /// from within the rule; i.e., the FIRST computation done by
785 /// ANTLR stops at the end of a rule.
787 /// EXAMPLE
789 /// When you find a "no viable alt exception", the input is not
790 /// consistent with any of the alternatives for rule r. The best
791 /// thing to do is to consume tokens until you see something that
792 /// can legally follow a call to r *or* any rule that called r.
793 /// You don't want the exact set of viable next tokens because the
794 /// input might just be missing a token--you might consume the
795 /// rest of the input looking for one of the missing tokens.
797 /// Consider grammar:
799 /// a : '[' b ']'
800 /// | '(' b ')'
801 /// ;
802 /// b : c '^' INT ;
803 /// c : ID
804 /// | INT
805 /// ;
807 /// At each rule invocation, the set of tokens that could follow
808 /// that rule is pushed on a stack. Here are the various "local"
809 /// follow sets:
811 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
812 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
813 /// FOLLOW(c_in_b) = FIRST('^') = '^'
815 /// Upon erroneous input "[]", the call chain is
817 /// a -> b -> c
819 /// and, hence, the follow context stack is:
821 /// depth local follow set after call to rule
822 /// 0 <EOF> a (from main())
823 /// 1 ']' b
824 /// 3 '^' c
826 /// Notice that ')' is not included, because b would have to have
827 /// been called from a different context in rule a for ')' to be
828 /// included.
830 /// For error recovery, we cannot consider FOLLOW(c)
831 /// (context-sensitive or otherwise). We need the combined set of
832 /// all context-sensitive FOLLOW sets--the set of all tokens that
833 /// could follow any reference in the call chain. We need to
834 /// resync to one of those tokens. Note that FOLLOW(c)='^' and if
835 /// we resync'd to that token, we'd consume until EOF. We need to
836 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
837 /// In this case, for input "[]", LA(1) is in this set so we would
838 /// not consume anything and after printing an error rule c would
839 /// return normally. It would not find the required '^' though.
840 /// At this point, it gets a mismatched token error and throws an
841 /// exception (since LA(1) is not in the viable following token
842 /// set). The rule exception handler tries to recover, but finds
843 /// the same recovery set and doesn't consume anything. Rule b
844 /// exits normally returning to rule a. Now it finds the ']' (and
845 /// with the successful match exits errorRecovery mode).
847 /// So, you can see that the parser walks up call chain looking
848 /// for the token that was a member of the recovery set.
850 /// Errors are not generated in errorRecovery mode.
852 /// ANTLR's error recovery mechanism is based upon original ideas:
854 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
856 /// and
858 /// "A note on error recovery in recursive descent parsers":
859 /// http://portal.acm.org/citation.cfm?id=947902.947905
861 /// Later, Josef Grosch had some good ideas:
863 /// "Efficient and Comfortable Error Recovery in Recursive Descent
864 /// Parsers":
865 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
867 /// Like Grosch I implemented local FOLLOW sets that are combined
868 /// at run-time upon error to avoid overhead during parsing.
870 static pANTLR3_BITSET
871 computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer)
873 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
876 /// Compute the context-sensitive FOLLOW set for current rule.
877 /// Documentation below is from the Java runtime.
879 /// This is the set of token types that can follow a specific rule
880 /// reference given a specific call chain. You get the set of
881 /// viable tokens that can possibly come next (look ahead depth 1)
882 /// given the current call chain. Contrast this with the
883 /// definition of plain FOLLOW for rule r:
885 /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
887 /// where x in T* and alpha, beta in V*; T is set of terminals and
888 /// V is the set of terminals and non terminals. In other words,
889 /// FOLLOW(r) is the set of all tokens that can possibly follow
890 /// references to r in///any* sentential form (context). At
891 /// runtime, however, we know precisely which context applies as
892 /// we have the call chain. We may compute the exact (rather
893 /// than covering superset) set of following tokens.
895 /// For example, consider grammar:
897 /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
898 /// | "return" expr '.'
899 /// ;
900 /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
901 /// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
902 /// | '(' expr ')'
903 /// ;
905 /// The FOLLOW sets are all inclusive whereas context-sensitive
906 /// FOLLOW sets are precisely what could follow a rule reference.
907 /// For input input "i=(3);", here is the derivation:
909 /// stat => ID '=' expr ';'
910 /// => ID '=' atom ('+' atom)* ';'
911 /// => ID '=' '(' expr ')' ('+' atom)* ';'
912 /// => ID '=' '(' atom ')' ('+' atom)* ';'
913 /// => ID '=' '(' INT ')' ('+' atom)* ';'
914 /// => ID '=' '(' INT ')' ';'
916 /// At the "3" token, you'd have a call chain of
918 /// stat -> expr -> atom -> expr -> atom
920 /// What can follow that specific nested ref to atom? Exactly ')'
921 /// as you can see by looking at the derivation of this specific
922 /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
924 /// You want the exact viable token set when recovering from a
925 /// token mismatch. Upon token mismatch, if LA(1) is member of
926 /// the viable next token set, then you know there is most likely
927 /// a missing token in the input stream. "Insert" one by just not
928 /// throwing an exception.
930 static pANTLR3_BITSET
931 computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer)
933 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
936 /// Compute the current followset for the input stream.
938 static pANTLR3_BITSET
939 combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
941 pANTLR3_BITSET followSet;
942 pANTLR3_BITSET localFollowSet;
943 ANTLR3_UINT32 top;
944 ANTLR3_UINT32 i;
946 top = recognizer->state->following->vector->count;
948 followSet = antlr3BitsetNew(0);
949 localFollowSet = NULL;
951 for (i = top; i>0; i--)
953 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) stackGet(recognizer->state->following, i-1));
955 if (localFollowSet != NULL)
957 followSet->borInPlace(followSet, localFollowSet);
959 if (exact == ANTLR3_TRUE)
961 if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
963 // Only leave EOR in the set if at top (start rule); this lets us know
964 // if we have to include the follow(start rule); I.E., EOF
966 if (i>1)
968 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
971 else
973 break; // Cannot see End Of Rule from here, just drop out
976 localFollowSet->free(localFollowSet);
977 localFollowSet = NULL;
981 if (localFollowSet != NULL)
983 localFollowSet->free(localFollowSet);
985 return followSet;
988 /// Standard/Example error display method.
989 /// No generic error message display funciton coudl possibly do everything correctly
990 /// for all possible parsers. Hence you are provided with this example routine, which
991 /// you should override in your parser/tree parser to do as you will.
993 /// Here we depart somewhat from the Java runtime as that has now split up a lot
994 /// of the error display routines into spearate units. However, ther is little advantage
995 /// to this in the C version as you will probably implement all such routines as a
996 /// separate translation unit, rather than install them all as pointers to functions
997 /// in the base recognizer.
999 static void
1000 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1002 pANTLR3_STRING ttext;
1003 pANTLR3_STRING ftext;
1004 pANTLR3_EXCEPTION ex;
1005 pANTLR3_COMMON_TOKEN theToken;
1006 pANTLR3_BASE_TREE theBaseTree;
1007 pANTLR3_COMMON_TREE theCommonTree;
1009 // Retrieve some info for easy reading.
1011 ex = recognizer->state->exception;
1012 ttext = NULL;
1014 // See if there is a 'filename' we can use
1016 if (ex->streamName == NULL)
1018 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1020 ANTLR3_FPRINTF(stderr, "-end of input-(");
1022 else
1024 ANTLR3_FPRINTF(stderr, "-unknown source-(");
1027 else
1029 ftext = ex->streamName->to8(ex->streamName);
1030 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1033 // Next comes the line number
1036 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1037 ANTLR3_FPRINTF(stderr, " : error %d : %s",
1038 recognizer->state->exception->type,
1039 (pANTLR3_UINT8) (recognizer->state->exception->message));
1042 // How we determine the next piece is dependent on which thing raised the
1043 // error.
1045 switch (recognizer->type)
1047 case ANTLR3_TYPE_PARSER:
1049 // Prepare the knowledge we know we have
1051 theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1052 ttext = theToken->toString(theToken);
1054 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1055 if (theToken != NULL)
1057 if (theToken->type == ANTLR3_TOKEN_EOF)
1059 ANTLR3_FPRINTF(stderr, ", at <EOF>");
1061 else
1063 // Guard against null text in a token
1065 ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1068 break;
1070 case ANTLR3_TYPE_TREE_PARSER:
1072 theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1073 ttext = theBaseTree->toStringTree(theBaseTree);
1075 if (theBaseTree != NULL)
1077 theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
1079 if (theCommonTree != NULL)
1081 theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
1083 ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1084 ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1086 break;
1088 default:
1090 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1091 return;
1092 break;
1095 // Although this function should generally be provided by the implementation, this one
1096 // should be as helpful as possible for grammar developers and serve as an example
1097 // of what you can do with each exception type. In general, when you make up your
1098 // 'real' handler, you should debug the routine with all possible errors you expect
1099 // which will then let you be as specific as possible about all circumstances.
1101 // Note that in the general case, errors thrown by tree parsers indicate a problem
1102 // with the output of the parser or with the tree grammar itself. The job of the parser
1103 // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1104 // at that stage should really be semantic errors that your own code determines and handles
1105 // in whatever way is appropriate.
1107 switch (ex->type)
1109 case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1111 // Indicates that the recognizer was fed a token which seesm to be
1112 // spurious input. We can detect this when the token that follows
1113 // this unwanted token would normally be part of the syntactically
1114 // correct stream. Then we can see that the token we are looking at
1115 // is just something that should not be there and throw this exception.
1117 if (tokenNames == NULL)
1119 ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1121 else
1123 if (ex->expecting == ANTLR3_TOKEN_EOF)
1125 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1127 else
1129 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1132 break;
1134 case ANTLR3_MISSING_TOKEN_EXCEPTION:
1136 // Indicates that the recognizer detected that the token we just
1137 // hit would be valid syntactically if preceeded by a particular
1138 // token. Perhaps a missing ';' at line end or a missing ',' in an
1139 // expression list, and such like.
1141 if (tokenNames == NULL)
1143 ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1145 else
1147 if (ex->expecting == ANTLR3_TOKEN_EOF)
1149 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1151 else
1153 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1156 break;
1158 case ANTLR3_RECOGNITION_EXCEPTION:
1160 // Indicates that the recognizer received a token
1161 // in the input that was not predicted. This is the basic exception type
1162 // from which all others are derived. So we assume it was a syntax error.
1163 // You may get this if there are not more tokens and more are needed
1164 // to complete a parse for instance.
1166 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1167 break;
1169 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1171 // We were expecting to see one thing and got another. This is the
1172 // most common error if we coudl not detect a missing or unwanted token.
1173 // Here you can spend your efforts to
1174 // derive more useful error messages based on the expected
1175 // token set and the last token and so on. The error following
1176 // bitmaps do a good job of reducing the set that we were looking
1177 // for down to something small. Knowing what you are parsing may be
1178 // able to allow you to be even more specific about an error.
1180 if (tokenNames == NULL)
1182 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1184 else
1186 if (ex->expecting == ANTLR3_TOKEN_EOF)
1188 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1190 else
1192 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1195 break;
1197 case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1199 // We could not pick any alt decision from the input given
1200 // so god knows what happened - however when you examine your grammar,
1201 // you should. It means that at the point where the current token occurred
1202 // that the DFA indicates nowhere to go from here.
1204 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1206 break;
1208 case ANTLR3_MISMATCHED_SET_EXCEPTION:
1211 ANTLR3_UINT32 count;
1212 ANTLR3_UINT32 bit;
1213 ANTLR3_UINT32 size;
1214 ANTLR3_UINT32 numbits;
1215 pANTLR3_BITSET errBits;
1217 // This means we were able to deal with one of a set of
1218 // possible tokens at this point, but we did not see any
1219 // member of that set.
1221 ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
1223 // What tokens could we have accepted at this point in the
1224 // parse?
1226 count = 0;
1227 errBits = antlr3BitsetLoad (ex->expectingSet);
1228 numbits = errBits->numBits (errBits);
1229 size = errBits->size (errBits);
1231 if (size > 0)
1233 // However many tokens we could have dealt with here, it is usually
1234 // not useful to print ALL of the set here. I arbitrarily chose 8
1235 // here, but you should do whatever makes sense for you of course.
1236 // No token number 0, so look for bit 1 and on.
1238 for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1240 // TODO: This doesn;t look right - should be asking if the bit is set!!
1242 if (tokenNames[bit])
1244 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1245 count++;
1248 ANTLR3_FPRINTF(stderr, "\n");
1250 else
1252 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1253 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1256 break;
1258 case ANTLR3_EARLY_EXIT_EXCEPTION:
1260 // We entered a loop requiring a number of token sequences
1261 // but found a token that ended that sequence earlier than
1262 // we should have done.
1264 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1265 break;
1267 default:
1269 // We don't handle any other exceptions here, but you can
1270 // if you wish. If we get an exception that hits this point
1271 // then we are just going to report what we know about the
1272 // token.
1274 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1275 break;
1278 // Here you have the token that was in error which if this is
1279 // the standard implementation will tell you the line and offset
1280 // and also record the address of the start of the line in the
1281 // input stream. You could therefore print the source line and so on.
1282 // Generally though, I would expect that your lexer/parser will keep
1283 // its own map of lines and source pointers or whatever as there
1284 // are a lot of specific things you need to know about the input
1285 // to do something like that.
1286 // Here is where you do it though :-).
1290 /// Return how many syntax errors were detected by this recognizer
1292 static ANTLR3_UINT32
1293 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1295 return recognizer->state->errorCount;
1298 /// Recover from an error found on the input stream. Mostly this is
1299 /// NoViableAlt exceptions, but could be a mismatched token that
1300 /// the match() routine could not recover from.
1302 static void
1303 recover (pANTLR3_BASE_RECOGNIZER recognizer)
1305 // Used to compute the follow set of tokens
1307 pANTLR3_BITSET followSet;
1308 pANTLR3_PARSER parser;
1309 pANTLR3_TREE_PARSER tparser;
1310 pANTLR3_INT_STREAM is;
1312 switch (recognizer->type)
1314 case ANTLR3_TYPE_PARSER:
1316 parser = (pANTLR3_PARSER) (recognizer->super);
1317 tparser = NULL;
1318 is = parser->tstream->istream;
1320 break;
1322 case ANTLR3_TYPE_TREE_PARSER:
1324 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1325 parser = NULL;
1326 is = tparser->ctnstream->tnstream->istream;
1328 break;
1330 default:
1332 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1333 return;
1335 break;
1338 // Are we about to repeat the same error?
1340 if (recognizer->state->lastErrorIndex == is->index(is))
1342 // The last error was at the same token index point. This must be a case
1343 // where LT(1) is in the recovery token set so nothing is
1344 // consumed. Consume a single token so at least to prevent
1345 // an infinite loop; this is a failsafe.
1347 is->consume(is);
1350 // Record error index position
1352 recognizer->state->lastErrorIndex = is->index(is);
1354 // Work out the follows set for error recovery
1356 followSet = recognizer->computeErrorRecoverySet(recognizer);
1358 // Call resync hook (for debuggers and so on)
1360 recognizer->beginResync(recognizer);
1362 // Consume tokens until we have resynced to something in the follows set
1364 recognizer->consumeUntilSet(recognizer, followSet);
1366 // End resync hook
1368 recognizer->endResync(recognizer);
1370 // Destroy the temporary bitset we produced.
1372 followSet->free(followSet);
1374 // Reset the inError flag so we don't re-report the exception
1376 recognizer->state->error = ANTLR3_FALSE;
1377 recognizer->state->failed = ANTLR3_FALSE;
1381 /// Attempt to recover from a single missing or extra token.
1383 /// EXTRA TOKEN
1385 /// LA(1) is not what we are looking for. If LA(2) has the right token,
1386 /// however, then assume LA(1) is some extra spurious token. Delete it
1387 /// and LA(2) as if we were doing a normal match(), which advances the
1388 /// input.
1390 /// MISSING TOKEN
1392 /// If current token is consistent with what could come after
1393 /// ttype then it is ok to "insert" the missing token, else throw
1394 /// exception For example, Input "i=(3;" is clearly missing the
1395 /// ')'. When the parser returns from the nested call to expr, it
1396 /// will have call chain:
1398 /// stat -> expr -> atom
1400 /// and it will be trying to match the ')' at this point in the
1401 /// derivation:
1403 /// => ID '=' '(' INT ')' ('+' atom)* ';'
1404 /// ^
1405 /// match() will see that ';' doesn't match ')' and report a
1406 /// mismatched token error. To recover, it sees that LA(1)==';'
1407 /// is in the set of tokens that can follow the ')' token
1408 /// reference in rule atom. It can assume that you forgot the ')'.
1410 /// The exception that was passed in, in the java implementation is
1411 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1412 /// error flag and rules cascade back when this is set.
1414 static void *
1415 recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1417 pANTLR3_PARSER parser;
1418 pANTLR3_TREE_PARSER tparser;
1419 pANTLR3_INT_STREAM is;
1420 void * matchedSymbol;
1422 // Invoke the debugger event if there is a debugger listening to us
1424 if (recognizer->debugger != NULL)
1426 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
1429 switch (recognizer->type)
1431 case ANTLR3_TYPE_PARSER:
1433 parser = (pANTLR3_PARSER) (recognizer->super);
1434 tparser = NULL;
1435 is = parser->tstream->istream;
1437 break;
1439 case ANTLR3_TYPE_TREE_PARSER:
1441 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1442 parser = NULL;
1443 is = tparser->ctnstream->tnstream->istream;
1445 break;
1447 default:
1449 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1450 return NULL;
1452 break;
1455 // Create an exception if we need one
1457 if (recognizer->state->exception == NULL)
1459 antlr3RecognitionExceptionNew(recognizer);
1462 // If the next token after the one we are looking at in the input stream
1463 // is what we are looking for then we remove the one we have discovered
1464 // from the stream by consuming it, then consume this next one along too as
1465 // if nothing had happened.
1467 if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1469 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1470 recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1472 // Call resync hook (for debuggers and so on)
1474 if (recognizer->debugger != NULL)
1476 recognizer->debugger->beginResync(recognizer->debugger);
1479 recognizer->beginResync(recognizer);
1481 // "delete" the extra token
1483 recognizer->beginResync(recognizer);
1484 is->consume(is);
1485 recognizer->endResync(recognizer);
1486 // End resync hook
1488 if (recognizer->debugger != NULL)
1490 recognizer->debugger->endResync(recognizer->debugger);
1493 // Print out the error after we consume so that ANTLRWorks sees the
1494 // token in the exception.
1496 recognizer->reportError(recognizer);
1498 // Return the token we are actually matching
1500 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1502 // Consume the token that the rule actually expected to get as if everything
1503 // was hunky dory.
1505 is->consume(is);
1507 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1509 return matchedSymbol;
1512 // Single token deletion (Unwanted above) did not work
1513 // so we see if we can insert a token instead by calculating which
1514 // token would be missing
1516 if (mismatchIsMissingToken(recognizer, is, follow))
1518 // We can fake the missing token and proceed
1520 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1521 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1522 recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1523 recognizer->state->exception->token = matchedSymbol;
1524 recognizer->state->exception->expecting = ttype;
1526 // Print out the error after we insert so that ANTLRWorks sees the
1527 // token in the exception.
1529 recognizer->reportError(recognizer);
1531 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1533 return matchedSymbol;
1537 // Neither deleting nor inserting tokens allows recovery
1538 // must just report the exception.
1540 recognizer->state->error = ANTLR3_TRUE;
1541 return NULL;
1544 static void *
1545 recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1547 pANTLR3_PARSER parser;
1548 pANTLR3_TREE_PARSER tparser;
1549 pANTLR3_INT_STREAM is;
1550 pANTLR3_COMMON_TOKEN matchedSymbol;
1552 switch (recognizer->type)
1554 case ANTLR3_TYPE_PARSER:
1556 parser = (pANTLR3_PARSER) (recognizer->super);
1557 tparser = NULL;
1558 is = parser->tstream->istream;
1560 break;
1562 case ANTLR3_TYPE_TREE_PARSER:
1564 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1565 parser = NULL;
1566 is = tparser->ctnstream->tnstream->istream;
1568 break;
1570 default:
1572 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1573 return NULL;
1575 break;
1578 if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1580 // We can fake the missing token and proceed
1582 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1583 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1584 recognizer->state->exception->token = matchedSymbol;
1586 // Print out the error after we insert so that ANTLRWorks sees the
1587 // token in the exception.
1589 recognizer->reportError(recognizer);
1591 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1593 return matchedSymbol;
1596 // TODO - Single token deletion like in recoverFromMismatchedToken()
1598 recognizer->state->error = ANTLR3_TRUE;
1599 recognizer->state->failed = ANTLR3_TRUE;
1600 return NULL;
1603 /// This code is factored out from mismatched token and mismatched set
1604 /// recovery. It handles "single token insertion" error recovery for
1605 /// both. No tokens are consumed to recover from insertions. Return
1606 /// true if recovery was possible else return false.
1608 static ANTLR3_BOOLEAN
1609 recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1611 pANTLR3_BITSET viableToksFollowingRule;
1612 pANTLR3_BITSET follow;
1613 pANTLR3_PARSER parser;
1614 pANTLR3_TREE_PARSER tparser;
1615 pANTLR3_INT_STREAM is;
1617 switch (recognizer->type)
1619 case ANTLR3_TYPE_PARSER:
1621 parser = (pANTLR3_PARSER) (recognizer->super);
1622 tparser = NULL;
1623 is = parser->tstream->istream;
1625 break;
1627 case ANTLR3_TYPE_TREE_PARSER:
1629 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1630 parser = NULL;
1631 is = tparser->ctnstream->tnstream->istream;
1633 break;
1635 default:
1637 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1638 return ANTLR3_FALSE;
1640 break;
1643 follow = antlr3BitsetLoad(followBits);
1645 if (follow == NULL)
1647 /* The follow set is NULL, which means we don't know what can come
1648 * next, so we "hit and hope" by just signifying that we cannot
1649 * recover, which will just cause the next token to be consumed,
1650 * which might dig us out.
1652 return ANTLR3_FALSE;
1655 /* We have a bitmap for the follow set, hence we can compute
1656 * what can follow this grammar element reference.
1658 if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1660 /* First we need to know which of the available tokens are viable
1661 * to follow this reference.
1663 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1665 /* Remove the EOR token, which we do not wish to compute with
1667 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1668 viableToksFollowingRule->free(viableToksFollowingRule);
1669 /* We now have the computed set of what can follow the current token
1673 /* We can now see if the current token works with the set of tokens
1674 * that could follow the current grammar reference. If it looks like it
1675 * is consistent, then we can "insert" that token by not throwing
1676 * an exception and assuming that we saw it.
1678 if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1680 /* report the error, but don't cause any rules to abort and stuff
1682 recognizer->reportError(recognizer);
1683 if (follow != NULL)
1685 follow->free(follow);
1687 recognizer->state->error = ANTLR3_FALSE;
1688 recognizer->state->failed = ANTLR3_FALSE;
1689 return ANTLR3_TRUE; /* Success in recovery */
1692 if (follow != NULL)
1694 follow->free(follow);
1697 /* We could not find anything viable to do, so this is going to
1698 * cause an exception.
1700 return ANTLR3_FALSE;
1703 /// Eat tokens from the input stream until we get one of JUST the right type
1705 static void
1706 consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1708 ANTLR3_UINT32 ttype;
1709 pANTLR3_PARSER parser;
1710 pANTLR3_TREE_PARSER tparser;
1711 pANTLR3_INT_STREAM is;
1713 switch (recognizer->type)
1715 case ANTLR3_TYPE_PARSER:
1717 parser = (pANTLR3_PARSER) (recognizer->super);
1718 tparser = NULL;
1719 is = parser->tstream->istream;
1721 break;
1723 case ANTLR3_TYPE_TREE_PARSER:
1725 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1726 parser = NULL;
1727 is = tparser->ctnstream->tnstream->istream;
1729 break;
1731 default:
1733 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1734 return;
1736 break;
1739 // What do have at the moment?
1741 ttype = is->_LA(is, 1);
1743 // Start eating tokens until we get to the one we want.
1745 while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1747 is->consume(is);
1748 ttype = is->_LA(is, 1);
1752 /// Eat tokens from the input stream until we find one that
1753 /// belongs to the supplied set.
1755 static void
1756 consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1758 ANTLR3_UINT32 ttype;
1759 pANTLR3_PARSER parser;
1760 pANTLR3_TREE_PARSER tparser;
1761 pANTLR3_INT_STREAM is;
1763 switch (recognizer->type)
1765 case ANTLR3_TYPE_PARSER:
1767 parser = (pANTLR3_PARSER) (recognizer->super);
1768 tparser = NULL;
1769 is = parser->tstream->istream;
1771 break;
1773 case ANTLR3_TYPE_TREE_PARSER:
1775 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1776 parser = NULL;
1777 is = tparser->ctnstream->tnstream->istream;
1779 break;
1781 default:
1783 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1784 return;
1786 break;
1789 // What do have at the moment?
1791 ttype = is->_LA(is, 1);
1793 // Start eating tokens until we get to one we want.
1795 while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1797 is->consume(is);
1798 ttype = is->_LA(is, 1);
1802 /** Return the rule invocation stack (how we got here in the parse.
1803 * In the java version Ter just asks the JVM for all the information
1804 * but in C we don't get this information, so I am going to do nothing
1805 * right now.
1807 static pANTLR3_STACK
1808 getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer)
1810 return NULL;
1813 static pANTLR3_STACK
1814 getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1816 return NULL;
1819 /** Convenience method for template rewrites - NYI.
1821 static pANTLR3_HASH_TABLE
1822 toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1824 return NULL;
1827 static void ANTLR3_CDECL
1828 freeIntTrie (void * trie)
1830 ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1834 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1835 * start index before. If the rule has not parsed input starting from the supplied start index,
1836 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1837 * then it will return the point where it last stopped parsing after that start point.
1839 * \remark
1840 * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1841 * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1842 * version of the table.
1844 static ANTLR3_MARKER
1845 getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1847 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1849 pANTLR3_INT_TRIE ruleList;
1850 ANTLR3_MARKER stopIndex;
1851 pANTLR3_TRIE_ENTRY entry;
1853 /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1854 * as we will need it eventually if we are being asked for the memo here.
1856 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1858 if (entry == NULL)
1860 /* Did not find it, so create a new one for it, with a bit depth based on the
1861 * size of the input stream. We need the bit depth to incorporate the number if
1862 * bits required to represent the largest possible stop index in the input, which is the
1863 * last character. An int stream is free to return the largest 64 bit offset if it has
1864 * no idea of the size, but you should remember that this will cause the leftmost
1865 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1867 ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */
1869 if (ruleList != NULL)
1871 recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1874 /* We cannot have a stopIndex in a trie we have just created of course
1876 return MEMO_RULE_UNKNOWN;
1879 ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr);
1881 /* See if there is a stop index associated with the supplied start index.
1883 stopIndex = 0;
1885 entry = ruleList->get(ruleList, ruleParseStart);
1886 if (entry != NULL)
1888 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1891 if (stopIndex == 0)
1893 return MEMO_RULE_UNKNOWN;
1896 return stopIndex;
1899 /** Has this rule already parsed input at the current index in the
1900 * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1901 * if we have not.
1903 * This method has a side-effect: if we have seen this input for
1904 * this rule and successfully parsed before, then seek ahead to
1905 * 1 past the stop token matched for this rule last time.
1907 static ANTLR3_BOOLEAN
1908 alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1910 ANTLR3_MARKER stopIndex;
1911 pANTLR3_LEXER lexer;
1912 pANTLR3_PARSER parser;
1913 pANTLR3_TREE_PARSER tparser;
1914 pANTLR3_INT_STREAM is;
1916 switch (recognizer->type)
1918 case ANTLR3_TYPE_PARSER:
1920 parser = (pANTLR3_PARSER) (recognizer->super);
1921 tparser = NULL;
1922 lexer = NULL;
1923 is = parser->tstream->istream;
1925 break;
1927 case ANTLR3_TYPE_TREE_PARSER:
1929 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1930 parser = NULL;
1931 lexer = NULL;
1932 is = tparser->ctnstream->tnstream->istream;
1934 break;
1936 case ANTLR3_TYPE_LEXER:
1938 lexer = (pANTLR3_LEXER) (recognizer->super);
1939 parser = NULL;
1940 tparser = NULL;
1941 is = lexer->input->istream;
1942 break;
1944 default:
1946 ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1947 return ANTLR3_FALSE;
1949 break;
1952 /* See if we have a memo marker for this.
1954 stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1956 if (stopIndex == MEMO_RULE_UNKNOWN)
1958 return ANTLR3_FALSE;
1961 if (stopIndex == MEMO_RULE_FAILED)
1963 recognizer->state->failed = ANTLR3_TRUE;
1965 else
1967 is->seek(is, stopIndex+1);
1970 /* If here then the rule was executed for this input already
1972 return ANTLR3_TRUE;
1975 /** Record whether or not this rule parsed the input at this position
1976 * successfully.
1978 static void
1979 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1981 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1983 pANTLR3_INT_TRIE ruleList;
1984 pANTLR3_TRIE_ENTRY entry;
1985 ANTLR3_MARKER stopIndex;
1986 pANTLR3_LEXER lexer;
1987 pANTLR3_PARSER parser;
1988 pANTLR3_TREE_PARSER tparser;
1989 pANTLR3_INT_STREAM is;
1991 switch (recognizer->type)
1993 case ANTLR3_TYPE_PARSER:
1995 parser = (pANTLR3_PARSER) (recognizer->super);
1996 tparser = NULL;
1997 is = parser->tstream->istream;
1999 break;
2001 case ANTLR3_TYPE_TREE_PARSER:
2003 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2004 parser = NULL;
2005 is = tparser->ctnstream->tnstream->istream;
2007 break;
2009 case ANTLR3_TYPE_LEXER:
2011 lexer = (pANTLR3_LEXER) (recognizer->super);
2012 parser = NULL;
2013 tparser = NULL;
2014 is = lexer->input->istream;
2015 break;
2017 default:
2019 ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2020 return;
2022 break;
2025 stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2027 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2029 if (entry != NULL)
2031 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2033 /* If we don't already have this entry, append it. The memoize trie does not
2034 * accept duplicates so it won't add it if already there and we just ignore the
2035 * return code as we don't care if it is there already.
2037 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2040 /** A syntactic predicate. Returns true/false depending on whether
2041 * the specified grammar fragment matches the current input stream.
2042 * This resets the failed instance var afterwards.
2044 static ANTLR3_BOOLEAN
2045 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2047 ANTLR3_MARKER start;
2048 pANTLR3_PARSER parser;
2049 pANTLR3_TREE_PARSER tparser;
2050 pANTLR3_INT_STREAM is;
2052 switch (recognizer->type)
2054 case ANTLR3_TYPE_PARSER:
2056 parser = (pANTLR3_PARSER) (recognizer->super);
2057 tparser = NULL;
2058 is = parser->tstream->istream;
2060 break;
2062 case ANTLR3_TYPE_TREE_PARSER:
2064 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2065 parser = NULL;
2066 is = tparser->ctnstream->tnstream->istream;
2068 break;
2070 default:
2072 ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2073 return ANTLR3_FALSE;
2075 break;
2078 /* Begin backtracking so we can get back to where we started after trying out
2079 * the syntactic predicate.
2081 start = is->mark(is);
2082 recognizer->state->backtracking++;
2084 /* Try the syntactical predicate
2086 predicate(ctx);
2088 /* Reset
2090 is->rewind(is, start);
2091 recognizer->state->backtracking--;
2093 if (recognizer->state->failed == ANTLR3_TRUE)
2095 /* Predicate failed
2097 recognizer->state->failed = ANTLR3_FALSE;
2098 return ANTLR3_FALSE;
2100 else
2102 /* Predicate was successful
2104 recognizer->state->failed = ANTLR3_FALSE;
2105 return ANTLR3_TRUE;
2109 static void
2110 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2112 if (recognizer->state->following != NULL)
2114 stackFree(recognizer->state->following);
2117 // Reset the state flags
2119 recognizer->state->errorRecovery = ANTLR3_FALSE;
2120 recognizer->state->lastErrorIndex = -1;
2121 recognizer->state->failed = ANTLR3_FALSE;
2122 recognizer->state->errorCount = 0;
2123 recognizer->state->backtracking = 0;
2124 recognizer->state->following = NULL;
2126 if (recognizer->state != NULL)
2128 if (recognizer->state->ruleMemo != NULL)
2130 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2131 recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */
2136 // Install a new following set
2138 recognizer->state->following = antlr3StackNew(8);
2142 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2143 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2145 static void *
2146 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2148 return tokLT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2151 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2152 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2154 static void *
2155 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
2156 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2158 pANTLR3_TOKEN_STREAM ts;
2159 pANTLR3_COMMON_TOKEN token;
2160 pANTLR3_COMMON_TOKEN current;
2161 pANTLR3_STRING text;
2163 // Dereference the standard pointers
2165 ts = (pANTLR3_TOKEN_STREAM)istream->super;
2167 // Work out what to use as the current symbol to make a line and offset etc
2168 // If we are at EOF, we use the token before EOF
2170 current = tokLT(ts, 1);
2171 if (current->type == ANTLR3_TOKEN_EOF)
2173 current = tokLT(ts, -1);
2176 // Create a new empty token
2178 if (recognizer->state->tokFactory == NULL)
2180 // We don't yet have a token factory for making tokens
2181 // we just need a fake one using the input stream of the current
2182 // token.
2184 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2186 token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2188 // Set some of the token properties based on the current token
2190 token->line = current->line;
2191 token->charPosition = current->charPosition;
2192 token->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
2193 token->type = expectedTokenType;
2194 token->user1 = current->user1;
2195 token->user2 = current->user2;
2196 token->user3 = current->user3;
2197 token->custom = current->custom;
2198 token->lineStart = current->lineStart;
2200 // Create the token text that shows it has been inserted
2202 token->setText8(token, (pANTLR3_UINT8)"<missing ");
2203 text = token->getText(token);
2205 if (text != NULL)
2207 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2208 text->append8(text, (const char *)">");
2211 // Finally return the pointer to our new token
2213 return token;
2217 #ifdef ANTLR3_WINDOWS
2218 #pragma warning( default : 4100 )
2219 #endif
2221 /// @}