thirdparty/libantlr3c-3.2/src/antlr3baserecognizer.c

   1 /** \file
   2  * Contains the base functions that all recognizers require.
   3  * Any function can be overridden by a lexer/parser/tree parser or by the
   4  * ANTLR3 programmer.
   5  *
   6  * \addtogroup pANTLR3_BASE_RECOGNIZER
   7  * @{
   8  */
   9 #include    <antlr3baserecognizer.h>
  10
  11 // [The "BSD licence"]
  12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
  13 // http://www.temporal-wave.com
  14 // http://www.linkedin.com/in/jimidle
  15 //
  16 // All rights reserved.
  17 //
  18 // Redistribution and use in source and binary forms, with or without
  19 // modification, are permitted provided that the following conditions
  20 // are met:
  21 // 1. Redistributions of source code must retain the above copyright
  22 //    notice, this list of conditions and the following disclaimer.
  23 // 2. Redistributions in binary form must reproduce the above copyright
  24 //    notice, this list of conditions and the following disclaimer in the
  25 //    documentation and/or other materials provided with the distribution.
  26 // 3. The name of the author may not be used to endorse or promote products
  27 //    derived from this software without specific prior written permission.
  28 //
  29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39
  40 #ifdef  ANTLR3_WINDOWS
  41 #pragma warning( disable : 4100 )
  42 #endif
  43
  44 /* Interface functions -standard implementations cover parser and treeparser
  45  * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
  46  * most of these functions.
  47  */
  48 static void                                     beginResync                                     (pANTLR3_BASE_RECOGNIZER recognizer);
  49 static pANTLR3_BITSET           computeErrorRecoverySet     (pANTLR3_BASE_RECOGNIZER recognizer);
  50 static void                                     endResync                                       (pANTLR3_BASE_RECOGNIZER recognizer);
  51 static void                                     beginBacktrack                          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
  52 static void                                     endBacktrack                            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
  53
  54 static void *                           match                                           (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
  55 static void                                     matchAny                                        (pANTLR3_BASE_RECOGNIZER recognizer);
  56 static void                                     mismatch                                        (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
  57 static ANTLR3_BOOLEAN           mismatchIsUnwantedToken         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
  58 static ANTLR3_BOOLEAN           mismatchIsMissingToken          (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
  59 static void                                     reportError                                     (pANTLR3_BASE_RECOGNIZER recognizer);
  60 static pANTLR3_BITSET           computeCSRuleFollow                     (pANTLR3_BASE_RECOGNIZER recognizer);
  61 static pANTLR3_BITSET           combineFollows                          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
  62 static void                                     displayRecognitionError     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
  63 static void                                     recover                                         (pANTLR3_BASE_RECOGNIZER recognizer);
  64 static void     *                               recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
  65 static void     *                               recoverFromMismatchedSet    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
  66 static ANTLR3_BOOLEAN           recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
  67 static void                                     consumeUntil                            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
  68 static void                                     consumeUntilSet                         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
  69 static pANTLR3_STACK            getRuleInvocationStack      (pANTLR3_BASE_RECOGNIZER recognizer);
  70 static pANTLR3_STACK            getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
  71 static pANTLR3_HASH_TABLE       toStrings                                       (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
  72 static ANTLR3_MARKER            getRuleMemoization                      (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
  73 static ANTLR3_BOOLEAN           alreadyParsedRule                       (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
  74 static void                                     memoize                                         (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
  75 static ANTLR3_BOOLEAN           synpred                                         (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
  76 static void                                     reset                                           (pANTLR3_BASE_RECOGNIZER recognizer);
  77 static void                                     freeBR                                          (pANTLR3_BASE_RECOGNIZER recognizer);
  78 static void *                           getCurrentInputSymbol           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
  79 static void *                           getMissingSymbol                        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION      e,
  80                                                                                                                         ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
  81 static ANTLR3_UINT32            getNumberOfSyntaxErrors         (pANTLR3_BASE_RECOGNIZER recognizer);
  82
  83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
  84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
  85 {
  86     pANTLR3_BASE_RECOGNIZER recognizer;
  87
  88     // Allocate memory for the structure
  89     //
  90     recognizer      = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
  91
  92     if  (recognizer == NULL)
  93     {
  94                 // Allocation failed
  95                 //
  96                 return  NULL;
  97     }
  98
  99
 100         // If we have been supplied with a pre-existing recognizer state
 101         // then we just install it, otherwise we must create one from scratch
 102         //
 103         if      (state == NULL)
 104         {
 105                 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
 106
 107                 if      (recognizer->state == NULL)
 108                 {
 109                         ANTLR3_FREE(recognizer);
 110                         return  NULL;
 111                 }
 112
 113                 // Initialize any new recognizer state
 114                 //
 115                 recognizer->state->errorRecovery        = ANTLR3_FALSE;
 116                 recognizer->state->lastErrorIndex       = -1;
 117                 recognizer->state->failed               = ANTLR3_FALSE;
 118                 recognizer->state->errorCount           = 0;
 119                 recognizer->state->backtracking         = 0;
 120                 recognizer->state->following            = NULL;
 121                 recognizer->state->ruleMemo             = NULL;
 122                 recognizer->state->tokenNames           = NULL;
 123                 recognizer->state->sizeHint             = sizeHint;
 124                 recognizer->state->tokSource            = NULL;
 125                 recognizer->state->tokFactory           = NULL;
 126
 127                 // Rather than check to see if we must initialize
 128                 // the stack every time we are asked for an new rewrite stream
 129                 // we just always create an empty stack and then just
 130                 // free it when the base recognizer is freed.
 131                 //
 132                 recognizer->state->rStreams             = antlr3VectorNew(0);  // We don't know the size.
 133
 134                 if      (recognizer->state->rStreams == NULL)
 135                 {
 136                         // Out of memory
 137                         //
 138                         ANTLR3_FREE(recognizer->state);
 139                         ANTLR3_FREE(recognizer);
 140                         return  NULL;
 141                 }
 142         }
 143         else
 144         {
 145                 // Install the one we were given, and do not reset it here
 146                 // as it will either already have been initialized or will
 147                 // be in a state that needs to be preserved.
 148                 //
 149                 recognizer->state = state;
 150         }
 151
 152     // Install the BR API
 153     //
 154     recognizer->alreadyParsedRule           = alreadyParsedRule;
 155     recognizer->beginResync                 = beginResync;
 156     recognizer->combineFollows              = combineFollows;
 157     recognizer->beginBacktrack              = beginBacktrack;
 158     recognizer->endBacktrack                = endBacktrack;
 159     recognizer->computeCSRuleFollow         = computeCSRuleFollow;
 160     recognizer->computeErrorRecoverySet     = computeErrorRecoverySet;
 161     recognizer->consumeUntil                = consumeUntil;
 162     recognizer->consumeUntilSet             = consumeUntilSet;
 163     recognizer->displayRecognitionError     = displayRecognitionError;
 164     recognizer->endResync                   = endResync;
 165     recognizer->exConstruct                 = antlr3MTExceptionNew;
 166     recognizer->getRuleInvocationStack      = getRuleInvocationStack;
 167     recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
 168     recognizer->getRuleMemoization          = getRuleMemoization;
 169     recognizer->match                       = match;
 170     recognizer->matchAny                    = matchAny;
 171     recognizer->memoize                     = memoize;
 172     recognizer->mismatch                    = mismatch;
 173     recognizer->mismatchIsUnwantedToken     = mismatchIsUnwantedToken;
 174     recognizer->mismatchIsMissingToken      = mismatchIsMissingToken;
 175     recognizer->recover                     = recover;
 176     recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
 177     recognizer->recoverFromMismatchedSet    = recoverFromMismatchedSet;
 178     recognizer->recoverFromMismatchedToken  = recoverFromMismatchedToken;
 179     recognizer->getNumberOfSyntaxErrors     = getNumberOfSyntaxErrors;
 180     recognizer->reportError                 = reportError;
 181     recognizer->reset                       = reset;
 182     recognizer->synpred                     = synpred;
 183     recognizer->toStrings                   = toStrings;
 184     recognizer->getCurrentInputSymbol       = getCurrentInputSymbol;
 185     recognizer->getMissingSymbol            = getMissingSymbol;
 186     recognizer->debugger                    = NULL;
 187
 188     recognizer->free = freeBR;
 189
 190     /* Initialize variables
 191      */
 192     recognizer->type                    = type;
 193
 194     recognizer->userData = NULL;
 195
 196     return  recognizer;
 197 }
 198 static void
 199 freeBR      (pANTLR3_BASE_RECOGNIZER recognizer)
 200 {
 201     pANTLR3_EXCEPTION thisE;
 202
 203         // Did we have a state allocated?
 204         //
 205         if      (recognizer->state != NULL)
 206         {
 207                 // Free any rule memoization we set up
 208                 //
 209                 if      (recognizer->state->ruleMemo != NULL)
 210                 {
 211                         recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
 212                         recognizer->state->ruleMemo = NULL;
 213                 }
 214
 215                 // Free any exception space we have left around
 216                 //
 217                 thisE = recognizer->state->exception;
 218                 if      (thisE != NULL)
 219                 {
 220                         thisE->freeEx(thisE);
 221                 }
 222
 223                 // Free any rewrite streams we have allocated
 224                 //
 225                 if      (recognizer->state->rStreams != NULL)
 226                 {
 227                         vectorFree(recognizer->state->rStreams);
 228                 }
 229
 230                 // Free up any token factory we created (error recovery for instance)
 231                 //
 232                 if      (recognizer->state->tokFactory != NULL)
 233                 {
 234                         recognizer->state->tokFactory->close(recognizer->state->tokFactory);
 235                 }
 236                 // Free the shared state memory
 237                 //
 238                 ANTLR3_FREE(recognizer->state);
 239         }
 240
 241         // Free the actual recognizer space
 242         //
 243     ANTLR3_FREE(recognizer);
 244 }
 245
 246 /**
 247  * Creates a new Mismatched Token Exception and inserts in the recognizer
 248  * exception stack.
 249  *
 250  * \param recognizer
 251  * Context pointer for this recognizer
 252  *
 253  */
 254 ANTLR3_API      void
 255 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
 256 {
 257     /* Create a basic recognition exception structure
 258      */
 259     antlr3RecognitionExceptionNew(recognizer);
 260
 261     /* Now update it to indicate this is a Mismatched token exception
 262      */
 263     recognizer->state->exception->name          = ANTLR3_MISMATCHED_EX_NAME;
 264     recognizer->state->exception->type          = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
 265
 266     return;
 267 }
 268
 269 ANTLR3_API      void
 270 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
 271 {
 272         pANTLR3_EXCEPTION                               ex;
 273         pANTLR3_LEXER                                   lexer;
 274         pANTLR3_PARSER                                  parser;
 275         pANTLR3_TREE_PARSER                             tparser;
 276
 277         pANTLR3_INPUT_STREAM                    ins;
 278         pANTLR3_INT_STREAM                              is;
 279         pANTLR3_COMMON_TOKEN_STREAM         cts;
 280         pANTLR3_TREE_NODE_STREAM            tns;
 281
 282         ins         = NULL;
 283         cts         = NULL;
 284         tns         = NULL;
 285         is          = NULL;
 286         lexer   = NULL;
 287         parser  = NULL;
 288         tparser = NULL;
 289
 290         switch  (recognizer->type)
 291         {
 292         case    ANTLR3_TYPE_LEXER:
 293
 294                 lexer   = (pANTLR3_LEXER) (recognizer->super);
 295                 ins     = lexer->input;
 296                 is      = ins->istream;
 297
 298                 break;
 299
 300         case    ANTLR3_TYPE_PARSER:
 301
 302                 parser  = (pANTLR3_PARSER) (recognizer->super);
 303                 cts     = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
 304                 is      = parser->tstream->istream;
 305
 306                 break;
 307
 308         case    ANTLR3_TYPE_TREE_PARSER:
 309
 310                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
 311                 tns     = tparser->ctnstream->tnstream;
 312                 is      = tns->istream;
 313
 314                 break;
 315
 316         default:
 317
 318                 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
 319                 return;
 320
 321                 break;
 322         }
 323
 324         /* Create a basic exception structure
 325          */
 326         ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
 327                 (void *)ANTLR3_RECOGNITION_EX_NAME,
 328                 NULL,
 329                 ANTLR3_FALSE);
 330
 331         /* Rest of information depends on the base type of the
 332          * input stream.
 333          */
 334         switch  (is->type & ANTLR3_INPUT_MASK)
 335         {
 336         case    ANTLR3_CHARSTREAM:
 337
 338                 ex->c                   = is->_LA                       (is, 1);                                        /* Current input character                      */
 339                 ex->line                = ins->getLine                  (ins);                                          /* Line number comes from stream                */
 340                 ex->charPositionInLine  = ins->getCharPositionInLine    (ins);      /* Line offset also comes from the stream   */
 341                 ex->index               = is->index                     (is);
 342                 ex->streamName          = ins->fileName;
 343                 ex->message             = "Unexpected character";
 344                 break;
 345
 346         case    ANTLR3_TOKENSTREAM:
 347
 348                 ex->token               = tokLT                                         (cts->tstream, 1);          /* Current input token                          */
 349                 ex->line                = ((pANTLR3_COMMON_TOKEN)(ex->token))->line;
 350                 ex->charPositionInLine  = ((pANTLR3_COMMON_TOKEN)(ex->token))->charPosition;
 351                 ex->index               = cts->tstream->istream->index                                  (cts->tstream->istream);
 352                 if      (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
 353                 {
 354                         ex->streamName          = NULL;
 355                 }
 356                 else
 357                 {
 358                         ex->streamName          = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
 359                 }
 360                 ex->message             = "Unexpected token";
 361                 break;
 362
 363         case    ANTLR3_COMMONTREENODE:
 364
 365                 ex->token               = tns->_LT                                                  (tns, 1);       /* Current input tree node                      */
 366                 ex->line                = ((pANTLR3_BASE_TREE)(ex->token))->getLine                 (ex->token);
 367                 ex->charPositionInLine  = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine   (ex->token);
 368                 ex->index               = tns->istream->index                                       (tns->istream);
 369
 370                 // Are you ready for this? Deep breath now...
 371                 //
 372                 {
 373                         pANTLR3_COMMON_TREE tnode;
 374
 375                         tnode           = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
 376
 377                         if      (tnode->token    == NULL)
 378                         {
 379                                 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
 380                         }
 381                         else
 382                         {
 383                                 if      (tnode->token->input == NULL)
 384                                 {
 385                                         ex->streamName          = NULL;
 386                                 }
 387                                 else
 388                                 {
 389                                         ex->streamName          = tnode->token->input->fileName;
 390                                 }
 391                         }
 392                         ex->message             = "Unexpected node";
 393                 }
 394                 break;
 395         }
 396
 397         ex->input                                               = is;
 398         ex->nextException                               = recognizer->state->exception; /* So we don't leak the memory */
 399         recognizer->state->exception    = ex;
 400         recognizer->state->error            = ANTLR3_TRUE;          /* Exception is outstanding */
 401
 402         return;
 403 }
 404
 405
 406 /// Match current input symbol against ttype.  Upon error, do one token
 407 /// insertion or deletion if possible.
 408 /// To turn off single token insertion or deletion error
 409 /// recovery, override mismatchRecover() and have it call
 410 /// plain mismatch(), which does not recover.  Then any error
 411 /// in a rule will cause an exception and immediate exit from
 412 /// rule.  Rule would recover by resynchronizing to the set of
 413 /// symbols that can follow rule ref.
 414 ///
 415 static void *
 416 match(  pANTLR3_BASE_RECOGNIZER recognizer,
 417                 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
 418 {
 419     pANTLR3_PARSER                      parser;
 420     pANTLR3_TREE_PARSER     tparser;
 421     pANTLR3_INT_STREAM      is;
 422         void                                    * matchedSymbol;
 423
 424     switch      (recognizer->type)
 425     {
 426                 case    ANTLR3_TYPE_PARSER:
 427
 428                         parser  = (pANTLR3_PARSER) (recognizer->super);
 429                         tparser = NULL;
 430                         is      = parser->tstream->istream;
 431
 432                         break;
 433
 434                 case    ANTLR3_TYPE_TREE_PARSER:
 435
 436                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
 437                         parser  = NULL;
 438                         is      = tparser->ctnstream->tnstream->istream;
 439
 440                         break;
 441
 442                 default:
 443
 444                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
 445                         return ANTLR3_FALSE;
 446
 447                         break;
 448     }
 449
 450         // Pick up the current input token/node for assignment to labels
 451         //
 452         matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
 453
 454     if  (is->_LA(is, 1) == ttype)
 455     {
 456                 // The token was the one we were told to expect
 457                 //
 458                 is->consume(is);                                                                        // Consume that token from the stream
 459                 recognizer->state->errorRecovery        = ANTLR3_FALSE; // Not in error recovery now (if we were)
 460                 recognizer->state->failed                       = ANTLR3_FALSE; // The match was a success
 461                 return matchedSymbol;                                                           // We are done
 462     }
 463
 464     // We did not find the expected token type, if we are backtracking then
 465     // we just set the failed flag and return.
 466     //
 467     if  (recognizer->state->backtracking > 0)
 468     {
 469                 // Backtracking is going on
 470                 //
 471                 recognizer->state->failed  = ANTLR3_TRUE;
 472                 return matchedSymbol;
 473         }
 474
 475     // We did not find the expected token and there is no backtracking
 476     // going on, so we mismatch, which creates an exception in the recognizer exception
 477     // stack.
 478     //
 479         matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
 480     return matchedSymbol;
 481 }
 482
 483 /// Consumes the next token, whatever it is, and resets the recognizer state
 484 /// so that it is not in error.
 485 ///
 486 /// \param recognizer
 487 /// Recognizer context pointer
 488 ///
 489 static void
 490 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
 491 {
 492     pANTLR3_PARSER          parser;
 493     pANTLR3_TREE_PARSER     tparser;
 494     pANTLR3_INT_STREAM      is;
 495
 496     switch      (recognizer->type)
 497     {
 498                 case    ANTLR3_TYPE_PARSER:
 499
 500                         parser  = (pANTLR3_PARSER) (recognizer->super);
 501                         tparser = NULL;
 502                         is      = parser->tstream->istream;
 503
 504                         break;
 505
 506                 case    ANTLR3_TYPE_TREE_PARSER:
 507
 508                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
 509                         parser  = NULL;
 510                         is      = tparser->ctnstream->tnstream->istream;
 511
 512                         break;
 513
 514                 default:
 515
 516                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
 517                         return;
 518
 519                 break;
 520     }
 521     recognizer->state->errorRecovery    = ANTLR3_FALSE;
 522     recognizer->state->failed               = ANTLR3_FALSE;
 523     is->consume(is);
 524
 525     return;
 526 }
 527 ///
 528 ///
 529 static ANTLR3_BOOLEAN
 530 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
 531 {
 532         ANTLR3_UINT32 nextt;
 533
 534         nextt = is->_LA(is, 2);
 535
 536         if      (nextt == ttype)
 537         {
 538                 if      (recognizer->state->exception != NULL)
 539                 {
 540                         recognizer->state->exception->expecting = nextt;
 541                 }
 542                 return ANTLR3_TRUE;             // This token is unknown, but the next one is the one we wanted
 543         }
 544         else
 545         {
 546                 return ANTLR3_FALSE;    // Neither this token, nor the one following is the one we wanted
 547         }
 548 }
 549
 550 ///
 551 ///
 552 static ANTLR3_BOOLEAN
 553 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
 554 {
 555         ANTLR3_BOOLEAN  retcode;
 556         pANTLR3_BITSET  followClone;
 557         pANTLR3_BITSET  viableTokensFollowingThisRule;
 558
 559         if      (follow == NULL)
 560         {
 561                 // There is no information about the tokens that can follow the last one
 562                 // hence we must say that the current one we found is not a member of the
 563                 // follow set and does not indicate a missing token. We will just consume this
 564                 // single token and see if the parser works it out from there.
 565                 //
 566                 return  ANTLR3_FALSE;
 567         }
 568
 569         followClone                                             = NULL;
 570         viableTokensFollowingThisRule   = NULL;
 571
 572         // The C bitset maps are laid down at compile time by the
 573         // C code generation. Hence we cannot remove things from them
 574         // and so on. So, in order to remove EOR (if we need to) then
 575         // we clone the static bitset.
 576         //
 577         followClone = antlr3BitsetLoad(follow);
 578         if      (followClone == NULL)
 579         {
 580                 return ANTLR3_FALSE;
 581         }
 582
 583         // Compute what can follow this grammar reference
 584         //
 585         if      (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
 586         {
 587                 // EOR can follow, but if we are not the start symbol, we
 588                 // need to remove it.
 589                 //
 590                 if      (recognizer->state->following->vector->count >= 0)
 591                 {
 592                         followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
 593                 }
 594
 595                 // Now compute the visiable tokens that can follow this rule, according to context
 596                 // and make them part of the follow set.
 597                 //
 598                 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
 599                 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
 600         }
 601
 602         /// if current token is consistent with what could come after set
 603         /// then we know we're missing a token; error recovery is free to
 604         /// "insert" the missing token
 605         ///
 606         /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
 607         /// in follow set to indicate that the fall of the start symbol is
 608         /// in the set (EOF can follow).
 609         ///
 610         if      (               followClone->isMember(followClone, is->_LA(is, 1))
 611                         ||      followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
 612                 )
 613         {
 614                 retcode = ANTLR3_TRUE;
 615         }
 616         else
 617         {
 618                 retcode = ANTLR3_FALSE;
 619         }
 620
 621         if      (viableTokensFollowingThisRule != NULL)
 622         {
 623                 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
 624         }
 625         if      (followClone != NULL)
 626         {
 627                 followClone->free(followClone);
 628         }
 629
 630         return retcode;
 631
 632 }
 633
 634 /// Factor out what to do upon token mismatch so tree parsers can behave
 635 /// differently.  Override and call mismatchRecover(input, ttype, follow)
 636 /// to get single token insertion and deletion.  Use this to turn off
 637 /// single token insertion and deletion. Override mismatchRecover
 638 /// to call this instead.
 639 ///
 640 /// \remark mismatch only works for parsers and must be overridden for anything else.
 641 ///
 642 static  void
 643 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
 644 {
 645     pANTLR3_PARSER          parser;
 646     pANTLR3_INT_STREAM      is;
 647
 648     // Install a mismatched token exception in the exception stack
 649     //
 650     antlr3MTExceptionNew(recognizer);
 651     recognizer->state->exception->expecting    = ttype;
 652
 653     switch      (recognizer->type)
 654     {
 655                 case    ANTLR3_TYPE_PARSER:
 656
 657                         parser  = (pANTLR3_PARSER) (recognizer->super);
 658                         is      = parser->tstream->istream;
 659
 660                         break;
 661
 662                 default:
 663
 664                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
 665                         return;
 666
 667                         break;
 668     }
 669
 670         if      (mismatchIsUnwantedToken(recognizer, is, ttype))
 671         {
 672                 // Create a basic recognition exception structure
 673                 //
 674             antlr3RecognitionExceptionNew(recognizer);
 675
 676                 // Now update it to indicate this is an unwanted token exception
 677                 //
 678                 recognizer->state->exception->name              = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
 679                 recognizer->state->exception->type              = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
 680
 681                 return;
 682         }
 683
 684         if      (mismatchIsMissingToken(recognizer, is, follow))
 685         {
 686                 // Create a basic recognition exception structure
 687                 //
 688             antlr3RecognitionExceptionNew(recognizer);
 689
 690                 // Now update it to indicate this is an unwanted token exception
 691                 //
 692                 recognizer->state->exception->name              = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
 693                 recognizer->state->exception->type              = ANTLR3_MISSING_TOKEN_EXCEPTION;
 694
 695                 return;
 696         }
 697
 698         // Just a mismatched token is all we can dtermine
 699         //
 700         antlr3MTExceptionNew(recognizer);
 701
 702         return;
 703 }
 704 /// Report a recognition problem.
 705 ///
 706 /// This method sets errorRecovery to indicate the parser is recovering
 707 /// not parsing.  Once in recovery mode, no errors are generated.
 708 /// To get out of recovery mode, the parser must successfully match
 709 /// a token (after a resync).  So it will go:
 710 ///
 711 ///             1. error occurs
 712 ///             2. enter recovery mode, report error
 713 ///             3. consume until token found in resynch set
 714 ///             4. try to resume parsing
 715 ///             5. next match() will reset errorRecovery mode
 716 ///
 717 /// If you override, make sure to update errorCount if you care about that.
 718 ///
 719 static void
 720 reportError                 (pANTLR3_BASE_RECOGNIZER recognizer)
 721 {
 722     if  (recognizer->state->errorRecovery == ANTLR3_TRUE)
 723     {
 724                 // Already in error recovery so don't display another error while doing so
 725                 //
 726                 return;
 727     }
 728
 729     // Signal we are in error recovery now
 730     //
 731     recognizer->state->errorRecovery = ANTLR3_TRUE;
 732
 733         // Indicate this recognizer had an error while processing.
 734         //
 735         recognizer->state->errorCount++;
 736
 737         // Call the error display routine
 738         //
 739     recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
 740 }
 741
 742 static void
 743 beginBacktrack          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
 744 {
 745         if      (recognizer->debugger != NULL)
 746         {
 747                 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
 748         }
 749 }
 750
 751 static void
 752 endBacktrack            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
 753 {
 754         if      (recognizer->debugger != NULL)
 755         {
 756                 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
 757         }
 758 }
 759 static void
 760 beginResync                 (pANTLR3_BASE_RECOGNIZER recognizer)
 761 {
 762         if      (recognizer->debugger != NULL)
 763         {
 764                 recognizer->debugger->beginResync(recognizer->debugger);
 765         }
 766 }
 767
 768 static void
 769 endResync                   (pANTLR3_BASE_RECOGNIZER recognizer)
 770 {
 771         if      (recognizer->debugger != NULL)
 772         {
 773                 recognizer->debugger->endResync(recognizer->debugger);
 774         }
 775 }
 776
 777 /// Compute the error recovery set for the current rule.
 778 /// Documentation below is from the Java implementation.
 779 ///
 780 /// During rule invocation, the parser pushes the set of tokens that can
 781 /// follow that rule reference on the stack; this amounts to
 782 /// computing FIRST of what follows the rule reference in the
 783 /// enclosing rule. This local follow set only includes tokens
 784 /// from within the rule; i.e., the FIRST computation done by
 785 /// ANTLR stops at the end of a rule.
 786 //
 787 /// EXAMPLE
 788 //
 789 /// When you find a "no viable alt exception", the input is not
 790 /// consistent with any of the alternatives for rule r.  The best
 791 /// thing to do is to consume tokens until you see something that
 792 /// can legally follow a call to r *or* any rule that called r.
 793 /// You don't want the exact set of viable next tokens because the
 794 /// input might just be missing a token--you might consume the
 795 /// rest of the input looking for one of the missing tokens.
 796 ///
 797 /// Consider grammar:
 798 ///
 799 /// a : '[' b ']'
 800 ///   | '(' b ')'
 801 ///   ;
 802 /// b : c '^' INT ;
 803 /// c : ID
 804 ///   | INT
 805 ///   ;
 806 ///
 807 /// At each rule invocation, the set of tokens that could follow
 808 /// that rule is pushed on a stack.  Here are the various "local"
 809 /// follow sets:
 810 ///
 811 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
 812 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
 813 /// FOLLOW(c_in_b) = FIRST('^') = '^'
 814 ///
 815 /// Upon erroneous input "[]", the call chain is
 816 ///
 817 /// a -> b -> c
 818 ///
 819 /// and, hence, the follow context stack is:
 820 ///
 821 /// depth  local follow set     after call to rule
 822 ///   0         <EOF>                    a (from main())
 823 ///   1          ']'                     b
 824 ///   3          '^'                     c
 825 ///
 826 /// Notice that ')' is not included, because b would have to have
 827 /// been called from a different context in rule a for ')' to be
 828 /// included.
 829 ///
 830 /// For error recovery, we cannot consider FOLLOW(c)
 831 /// (context-sensitive or otherwise).  We need the combined set of
 832 /// all context-sensitive FOLLOW sets--the set of all tokens that
 833 /// could follow any reference in the call chain.  We need to
 834 /// resync to one of those tokens.  Note that FOLLOW(c)='^' and if
 835 /// we resync'd to that token, we'd consume until EOF.  We need to
 836 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
 837 /// In this case, for input "[]", LA(1) is in this set so we would
 838 /// not consume anything and after printing an error rule c would
 839 /// return normally.  It would not find the required '^' though.
 840 /// At this point, it gets a mismatched token error and throws an
 841 /// exception (since LA(1) is not in the viable following token
 842 /// set).  The rule exception handler tries to recover, but finds
 843 /// the same recovery set and doesn't consume anything.  Rule b
 844 /// exits normally returning to rule a.  Now it finds the ']' (and
 845 /// with the successful match exits errorRecovery mode).
 846 ///
 847 /// So, you can see that the parser walks up call chain looking
 848 /// for the token that was a member of the recovery set.
 849 ///
 850 /// Errors are not generated in errorRecovery mode.
 851 ///
 852 /// ANTLR's error recovery mechanism is based upon original ideas:
 853 ///
 854 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
 855 ///
 856 /// and
 857 ///
 858 /// "A note on error recovery in recursive descent parsers":
 859 /// http://portal.acm.org/citation.cfm?id=947902.947905
 860 ///
 861 /// Later, Josef Grosch had some good ideas:
 862 ///
 863 /// "Efficient and Comfortable Error Recovery in Recursive Descent
 864 /// Parsers":
 865 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
 866 ///
 867 /// Like Grosch I implemented local FOLLOW sets that are combined
 868 /// at run-time upon error to avoid overhead during parsing.
 869 ///
 870 static pANTLR3_BITSET
 871 computeErrorRecoverySet     (pANTLR3_BASE_RECOGNIZER recognizer)
 872 {
 873     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
 874 }
 875
 876 /// Compute the context-sensitive FOLLOW set for current rule.
 877 /// Documentation below is from the Java runtime.
 878 ///
 879 /// This is the set of token types that can follow a specific rule
 880 /// reference given a specific call chain.  You get the set of
 881 /// viable tokens that can possibly come next (look ahead depth 1)
 882 /// given the current call chain.  Contrast this with the
 883 /// definition of plain FOLLOW for rule r:
 884 ///
 885 ///  FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
 886 ///
 887 /// where x in T* and alpha, beta in V*; T is set of terminals and
 888 /// V is the set of terminals and non terminals.  In other words,
 889 /// FOLLOW(r) is the set of all tokens that can possibly follow
 890 /// references to r in///any* sentential form (context).  At
 891 /// runtime, however, we know precisely which context applies as
 892 /// we have the call chain.  We may compute the exact (rather
 893 /// than covering superset) set of following tokens.
 894 ///
 895 /// For example, consider grammar:
 896 ///
 897 /// stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
 898 ///      | "return" expr '.'
 899 ///      ;
 900 /// expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
 901 /// atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
 902 ///      | '(' expr ')'
 903 ///      ;
 904 ///
 905 /// The FOLLOW sets are all inclusive whereas context-sensitive
 906 /// FOLLOW sets are precisely what could follow a rule reference.
 907 /// For input input "i=(3);", here is the derivation:
 908 ///
 909 /// stat => ID '=' expr ';'
 910 ///      => ID '=' atom ('+' atom)* ';'
 911 ///      => ID '=' '(' expr ')' ('+' atom)* ';'
 912 ///      => ID '=' '(' atom ')' ('+' atom)* ';'
 913 ///      => ID '=' '(' INT ')' ('+' atom)* ';'
 914 ///      => ID '=' '(' INT ')' ';'
 915 ///
 916 /// At the "3" token, you'd have a call chain of
 917 ///
 918 ///   stat -> expr -> atom -> expr -> atom
 919 ///
 920 /// What can follow that specific nested ref to atom?  Exactly ')'
 921 /// as you can see by looking at the derivation of this specific
 922 /// input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
 923 ///
 924 /// You want the exact viable token set when recovering from a
 925 /// token mismatch.  Upon token mismatch, if LA(1) is member of
 926 /// the viable next token set, then you know there is most likely
 927 /// a missing token in the input stream.  "Insert" one by just not
 928 /// throwing an exception.
 929 ///
 930 static pANTLR3_BITSET
 931 computeCSRuleFollow         (pANTLR3_BASE_RECOGNIZER recognizer)
 932 {
 933     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
 934 }
 935
 936 /// Compute the current followset for the input stream.
 937 ///
 938 static pANTLR3_BITSET
 939 combineFollows              (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
 940 {
 941     pANTLR3_BITSET      followSet;
 942     pANTLR3_BITSET      localFollowSet;
 943     ANTLR3_UINT32       top;
 944     ANTLR3_UINT32       i;
 945
 946     top = recognizer->state->following->vector->count;
 947
 948     followSet       = antlr3BitsetNew(0);
 949         localFollowSet  = NULL;
 950
 951     for (i = top; i>0; i--)
 952     {
 953                 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) stackGet(recognizer->state->following, i-1));
 954
 955                 if  (localFollowSet != NULL)
 956                 {
 957                         followSet->borInPlace(followSet, localFollowSet);
 958
 959                         if      (exact == ANTLR3_TRUE)
 960                         {
 961                                 if      (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
 962                                 {
 963                                         // Only leave EOR in the set if at top (start rule); this lets us know
 964                                         // if we have to include the follow(start rule); I.E., EOF
 965                                         //
 966                                         if      (i>1)
 967                                         {
 968                                                 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
 969                                         }
 970                                 }
 971                                 else
 972                                 {
 973                                         break;  // Cannot see End Of Rule from here, just drop out
 974                                 }
 975                         }
 976                         localFollowSet->free(localFollowSet);
 977                         localFollowSet = NULL;
 978                 }
 979     }
 980
 981         if      (localFollowSet != NULL)
 982         {
 983                 localFollowSet->free(localFollowSet);
 984         }
 985     return  followSet;
 986 }
 987
 988 /// Standard/Example error display method.
 989 /// No generic error message display funciton coudl possibly do everything correctly
 990 /// for all possible parsers. Hence you are provided with this example routine, which
 991 /// you should override in your parser/tree parser to do as you will.
 992 ///
 993 /// Here we depart somewhat from the Java runtime as that has now split up a lot
 994 /// of the error display routines into spearate units. However, ther is little advantage
 995 /// to this in the C version as you will probably implement all such routines as a
 996 /// separate translation unit, rather than install them all as pointers to functions
 997 /// in the base recognizer.
 998 ///
 999 static void
1000 displayRecognitionError     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1001 {
1002         pANTLR3_STRING                  ttext;
1003         pANTLR3_STRING                  ftext;
1004         pANTLR3_EXCEPTION           ex;
1005         pANTLR3_COMMON_TOKEN    theToken;
1006         pANTLR3_BASE_TREE           theBaseTree;
1007         pANTLR3_COMMON_TREE         theCommonTree;
1008
1009         // Retrieve some info for easy reading.
1010         //
1011         ex          =           recognizer->state->exception;
1012         ttext   =               NULL;
1013
1014         // See if there is a 'filename' we can use
1015         //
1016         if      (ex->streamName == NULL)
1017         {
1018                 if      (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1019                 {
1020                         ANTLR3_FPRINTF(stderr, "-end of input-(");
1021                 }
1022                 else
1023                 {
1024                         ANTLR3_FPRINTF(stderr, "-unknown source-(");
1025                 }
1026         }
1027         else
1028         {
1029                 ftext = ex->streamName->to8(ex->streamName);
1030                 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1031         }
1032
1033         // Next comes the line number
1034         //
1035
1036         ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1037         ANTLR3_FPRINTF(stderr, " : error %d : %s",
1038                                                                                 recognizer->state->exception->type,
1039                                         (pANTLR3_UINT8)    (recognizer->state->exception->message));
1040
1041
1042         // How we determine the next piece is dependent on which thing raised the
1043         // error.
1044         //
1045         switch  (recognizer->type)
1046         {
1047         case    ANTLR3_TYPE_PARSER:
1048
1049                 // Prepare the knowledge we know we have
1050                 //
1051                 theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1052                 ttext       = theToken->toString(theToken);
1053
1054                 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1055                 if  (theToken != NULL)
1056                 {
1057                         if (theToken->type == ANTLR3_TOKEN_EOF)
1058                         {
1059                                 ANTLR3_FPRINTF(stderr, ", at <EOF>");
1060                         }
1061                         else
1062                         {
1063                                 // Guard against null text in a token
1064                                 //
1065                                 ANTLR3_FPRINTF(stderr, "\n    near %s\n    ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1066                         }
1067                 }
1068                 break;
1069
1070         case    ANTLR3_TYPE_TREE_PARSER:
1071
1072                 theBaseTree     = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1073                 ttext           = theBaseTree->toStringTree(theBaseTree);
1074
1075                 if  (theBaseTree != NULL)
1076                 {
1077                         theCommonTree   = (pANTLR3_COMMON_TREE)     theBaseTree->super;
1078
1079                         if      (theCommonTree != NULL)
1080                         {
1081                                 theToken        = (pANTLR3_COMMON_TOKEN)    theBaseTree->getToken(theBaseTree);
1082                         }
1083                         ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1084                         ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1085                 }
1086                 break;
1087
1088         default:
1089
1090                 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1091                 return;
1092                 break;
1093         }
1094
1095         // Although this function should generally be provided by the implementation, this one
1096         // should be as helpful as possible for grammar developers and serve as an example
1097         // of what you can do with each exception type. In general, when you make up your
1098         // 'real' handler, you should debug the routine with all possible errors you expect
1099         // which will then let you be as specific as possible about all circumstances.
1100         //
1101         // Note that in the general case, errors thrown by tree parsers indicate a problem
1102         // with the output of the parser or with the tree grammar itself. The job of the parser
1103         // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1104         // at that stage should really be semantic errors that your own code determines and handles
1105         // in whatever way is appropriate.
1106         //
1107         switch  (ex->type)
1108         {
1109         case    ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1110
1111                 // Indicates that the recognizer was fed a token which seesm to be
1112                 // spurious input. We can detect this when the token that follows
1113                 // this unwanted token would normally be part of the syntactically
1114                 // correct stream. Then we can see that the token we are looking at
1115                 // is just something that should not be there and throw this exception.
1116                 //
1117                 if      (tokenNames == NULL)
1118                 {
1119                         ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1120                 }
1121                 else
1122                 {
1123                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1124                         {
1125                                 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1126                         }
1127                         else
1128                         {
1129                                 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1130                         }
1131                 }
1132                 break;
1133
1134         case    ANTLR3_MISSING_TOKEN_EXCEPTION:
1135
1136                 // Indicates that the recognizer detected that the token we just
1137                 // hit would be valid syntactically if preceeded by a particular
1138                 // token. Perhaps a missing ';' at line end or a missing ',' in an
1139                 // expression list, and such like.
1140                 //
1141                 if      (tokenNames == NULL)
1142                 {
1143                         ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1144                 }
1145                 else
1146                 {
1147                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1148                         {
1149                                 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1150                         }
1151                         else
1152                         {
1153                                 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1154                         }
1155                 }
1156                 break;
1157
1158         case    ANTLR3_RECOGNITION_EXCEPTION:
1159
1160                 // Indicates that the recognizer received a token
1161                 // in the input that was not predicted. This is the basic exception type
1162                 // from which all others are derived. So we assume it was a syntax error.
1163                 // You may get this if there are not more tokens and more are needed
1164                 // to complete a parse for instance.
1165                 //
1166                 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1167                 break;
1168
1169         case    ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1170
1171                 // We were expecting to see one thing and got another. This is the
1172                 // most common error if we coudl not detect a missing or unwanted token.
1173                 // Here you can spend your efforts to
1174                 // derive more useful error messages based on the expected
1175                 // token set and the last token and so on. The error following
1176                 // bitmaps do a good job of reducing the set that we were looking
1177                 // for down to something small. Knowing what you are parsing may be
1178                 // able to allow you to be even more specific about an error.
1179                 //
1180                 if      (tokenNames == NULL)
1181                 {
1182                         ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1183                 }
1184                 else
1185                 {
1186                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1187                         {
1188                                 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1189                         }
1190                         else
1191                         {
1192                                 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1193                         }
1194                 }
1195                 break;
1196
1197         case    ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1198
1199                 // We could not pick any alt decision from the input given
1200                 // so god knows what happened - however when you examine your grammar,
1201                 // you should. It means that at the point where the current token occurred
1202                 // that the DFA indicates nowhere to go from here.
1203                 //
1204                 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1205
1206                 break;
1207
1208         case    ANTLR3_MISMATCHED_SET_EXCEPTION:
1209
1210                 {
1211                         ANTLR3_UINT32     count;
1212                         ANTLR3_UINT32     bit;
1213                         ANTLR3_UINT32     size;
1214                         ANTLR3_UINT32     numbits;
1215                         pANTLR3_BITSET    errBits;
1216
1217                         // This means we were able to deal with one of a set of
1218                         // possible tokens at this point, but we did not see any
1219                         // member of that set.
1220                         //
1221                         ANTLR3_FPRINTF(stderr, " : unexpected input...\n  expected one of : ");
1222
1223                         // What tokens could we have accepted at this point in the
1224                         // parse?
1225                         //
1226                         count   = 0;
1227                         errBits = antlr3BitsetLoad              (ex->expectingSet);
1228                         numbits = errBits->numBits              (errBits);
1229                         size    = errBits->size                 (errBits);
1230
1231                         if  (size > 0)
1232                         {
1233                                 // However many tokens we could have dealt with here, it is usually
1234                                 // not useful to print ALL of the set here. I arbitrarily chose 8
1235                                 // here, but you should do whatever makes sense for you of course.
1236                                 // No token number 0, so look for bit 1 and on.
1237                                 //
1238                                 for     (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1239                                 {
1240                                         // TODO: This doesn;t look right - should be asking if the bit is set!!
1241                                         //
1242                                         if  (tokenNames[bit])
1243                                         {
1244                                                 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1245                                                 count++;
1246                                         }
1247                                 }
1248                                 ANTLR3_FPRINTF(stderr, "\n");
1249                         }
1250                         else
1251                         {
1252                                 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1253                                 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1254                         }
1255                 }
1256                 break;
1257
1258         case    ANTLR3_EARLY_EXIT_EXCEPTION:
1259
1260                 // We entered a loop requiring a number of token sequences
1261                 // but found a token that ended that sequence earlier than
1262                 // we should have done.
1263                 //
1264                 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1265                 break;
1266
1267         default:
1268
1269                 // We don't handle any other exceptions here, but you can
1270                 // if you wish. If we get an exception that hits this point
1271                 // then we are just going to report what we know about the
1272                 // token.
1273                 //
1274                 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1275                 break;
1276         }
1277
1278         // Here you have the token that was in error which if this is
1279         // the standard implementation will tell you the line and offset
1280         // and also record the address of the start of the line in the
1281         // input stream. You could therefore print the source line and so on.
1282         // Generally though, I would expect that your lexer/parser will keep
1283         // its own map of lines and source pointers or whatever as there
1284         // are a lot of specific things you need to know about the input
1285         // to do something like that.
1286         // Here is where you do it though :-).
1287         //
1288 }
1289
1290 /// Return how many syntax errors were detected by this recognizer
1291 ///
1292 static ANTLR3_UINT32
1293 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1294 {
1295         return  recognizer->state->errorCount;
1296 }
1297
1298 /// Recover from an error found on the input stream.  Mostly this is
1299 /// NoViableAlt exceptions, but could be a mismatched token that
1300 /// the match() routine could not recover from.
1301 ///
1302 static void
1303 recover                     (pANTLR3_BASE_RECOGNIZER recognizer)
1304 {
1305     // Used to compute the follow set of tokens
1306     //
1307     pANTLR3_BITSET                      followSet;
1308     pANTLR3_PARSER                      parser;
1309     pANTLR3_TREE_PARSER     tparser;
1310     pANTLR3_INT_STREAM      is;
1311
1312     switch      (recognizer->type)
1313     {
1314                 case    ANTLR3_TYPE_PARSER:
1315
1316                 parser  = (pANTLR3_PARSER) (recognizer->super);
1317                 tparser = NULL;
1318                 is              = parser->tstream->istream;
1319
1320         break;
1321
1322     case        ANTLR3_TYPE_TREE_PARSER:
1323
1324                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1325                 parser  = NULL;
1326                 is              = tparser->ctnstream->tnstream->istream;
1327
1328         break;
1329
1330     default:
1331
1332                 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1333                 return;
1334
1335         break;
1336     }
1337
1338         // Are we about to repeat the same error?
1339         //
1340     if  (recognizer->state->lastErrorIndex == is->index(is))
1341     {
1342                 // The last error was at the same token index point. This must be a case
1343                 // where LT(1) is in the recovery token set so nothing is
1344                 // consumed. Consume a single token so at least to prevent
1345                 // an infinite loop; this is a failsafe.
1346                 //
1347                 is->consume(is);
1348     }
1349
1350     // Record error index position
1351     //
1352     recognizer->state->lastErrorIndex    = is->index(is);
1353
1354     // Work out the follows set for error recovery
1355     //
1356     followSet   = recognizer->computeErrorRecoverySet(recognizer);
1357
1358     // Call resync hook (for debuggers and so on)
1359     //
1360     recognizer->beginResync(recognizer);
1361
1362     // Consume tokens until we have resynced to something in the follows set
1363     //
1364     recognizer->consumeUntilSet(recognizer, followSet);
1365
1366     // End resync hook
1367     //
1368     recognizer->endResync(recognizer);
1369
1370     // Destroy the temporary bitset we produced.
1371     //
1372     followSet->free(followSet);
1373
1374     // Reset the inError flag so we don't re-report the exception
1375     //
1376     recognizer->state->error    = ANTLR3_FALSE;
1377     recognizer->state->failed   = ANTLR3_FALSE;
1378 }
1379
1380
1381 /// Attempt to recover from a single missing or extra token.
1382 ///
1383 /// EXTRA TOKEN
1384 ///
1385 /// LA(1) is not what we are looking for.  If LA(2) has the right token,
1386 /// however, then assume LA(1) is some extra spurious token.  Delete it
1387 /// and LA(2) as if we were doing a normal match(), which advances the
1388 /// input.
1389 ///
1390 /// MISSING TOKEN
1391 ///
1392 /// If current token is consistent with what could come after
1393 /// ttype then it is ok to "insert" the missing token, else throw
1394 /// exception For example, Input "i=(3;" is clearly missing the
1395 /// ')'.  When the parser returns from the nested call to expr, it
1396 /// will have call chain:
1397 ///
1398 ///    stat -> expr -> atom
1399 ///
1400 /// and it will be trying to match the ')' at this point in the
1401 /// derivation:
1402 ///
1403 ///       => ID '=' '(' INT ')' ('+' atom)* ';'
1404 ///                          ^
1405 /// match() will see that ';' doesn't match ')' and report a
1406 /// mismatched token error.  To recover, it sees that LA(1)==';'
1407 /// is in the set of tokens that can follow the ')' token
1408 /// reference in rule atom.  It can assume that you forgot the ')'.
1409 ///
1410 /// The exception that was passed in, in the java implementation is
1411 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1412 /// error flag and rules cascade back when this is set.
1413 ///
1414 static void *
1415 recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1416 {
1417         pANTLR3_PARSER                    parser;
1418         pANTLR3_TREE_PARSER           tparser;
1419         pANTLR3_INT_STREAM            is;
1420         void                                    * matchedSymbol;
1421
1422         // Invoke the debugger event if there is a debugger listening to us
1423         //
1424         if      (recognizer->debugger != NULL)
1425         {
1426                 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
1427         }
1428
1429         switch  (recognizer->type)
1430         {
1431         case    ANTLR3_TYPE_PARSER:
1432
1433                 parser  = (pANTLR3_PARSER) (recognizer->super);
1434                 tparser = NULL;
1435                 is      = parser->tstream->istream;
1436
1437                 break;
1438
1439         case    ANTLR3_TYPE_TREE_PARSER:
1440
1441                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1442                 parser  = NULL;
1443                 is      = tparser->ctnstream->tnstream->istream;
1444
1445                 break;
1446
1447         default:
1448
1449                 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1450                 return NULL;
1451
1452                 break;
1453         }
1454
1455         // Create an exception if we need one
1456         //
1457         if      (recognizer->state->exception == NULL)
1458         {
1459                 antlr3RecognitionExceptionNew(recognizer);
1460         }
1461
1462         // If the next token after the one we are looking at in the input stream
1463         // is what we are looking for then we remove the one we have discovered
1464         // from the stream by consuming it, then consume this next one along too as
1465         // if nothing had happened.
1466         //
1467         if      ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1468         {
1469                 recognizer->state->exception->type              = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1470                 recognizer->state->exception->message   = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1471
1472                 // Call resync hook (for debuggers and so on)
1473                 //
1474                 if      (recognizer->debugger != NULL)
1475                 {
1476                         recognizer->debugger->beginResync(recognizer->debugger);
1477                 }
1478
1479                 recognizer->beginResync(recognizer);
1480
1481                 // "delete" the extra token
1482                 //
1483                 recognizer->beginResync(recognizer);
1484                 is->consume(is);
1485                 recognizer->endResync(recognizer);
1486                 // End resync hook
1487                 //
1488                 if      (recognizer->debugger != NULL)
1489                 {
1490                         recognizer->debugger->endResync(recognizer->debugger);
1491                 }
1492
1493                 // Print out the error after we consume so that ANTLRWorks sees the
1494                 // token in the exception.
1495                 //
1496                 recognizer->reportError(recognizer);
1497
1498                 // Return the token we are actually matching
1499                 //
1500                 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1501
1502                 // Consume the token that the rule actually expected to get as if everything
1503                 // was hunky dory.
1504                 //
1505                 is->consume(is);
1506
1507                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1508
1509                 return  matchedSymbol;
1510         }
1511
1512         // Single token deletion (Unwanted above) did not work
1513         // so we see if we can insert a token instead by calculating which
1514         // token would be missing
1515         //
1516         if      (mismatchIsMissingToken(recognizer, is, follow))
1517         {
1518                 // We can fake the missing token and proceed
1519                 //
1520                 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1521                 recognizer->state->exception->type              = ANTLR3_MISSING_TOKEN_EXCEPTION;
1522                 recognizer->state->exception->message   = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1523                 recognizer->state->exception->token             = matchedSymbol;
1524                 recognizer->state->exception->expecting = ttype;
1525
1526                 // Print out the error after we insert so that ANTLRWorks sees the
1527                 // token in the exception.
1528                 //
1529                 recognizer->reportError(recognizer);
1530
1531                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1532
1533                 return  matchedSymbol;
1534         }
1535
1536
1537         // Neither deleting nor inserting tokens allows recovery
1538         // must just report the exception.
1539         //
1540         recognizer->state->error            = ANTLR3_TRUE;
1541         return NULL;
1542 }
1543
1544 static void *
1545 recoverFromMismatchedSet            (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1546 {
1547     pANTLR3_PARSER                      parser;
1548     pANTLR3_TREE_PARSER     tparser;
1549     pANTLR3_INT_STREAM      is;
1550         pANTLR3_COMMON_TOKEN    matchedSymbol;
1551
1552     switch      (recognizer->type)
1553     {
1554     case        ANTLR3_TYPE_PARSER:
1555
1556                 parser  = (pANTLR3_PARSER) (recognizer->super);
1557                 tparser = NULL;
1558                 is      = parser->tstream->istream;
1559
1560         break;
1561
1562     case        ANTLR3_TYPE_TREE_PARSER:
1563
1564                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1565                 parser  = NULL;
1566                 is      = tparser->ctnstream->tnstream->istream;
1567
1568         break;
1569
1570     default:
1571
1572                 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1573                 return NULL;
1574
1575         break;
1576     }
1577
1578         if      (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1579         {
1580                 // We can fake the missing token and proceed
1581                 //
1582                 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1583                 recognizer->state->exception->type      = ANTLR3_MISSING_TOKEN_EXCEPTION;
1584                 recognizer->state->exception->token     = matchedSymbol;
1585
1586                 // Print out the error after we insert so that ANTLRWorks sees the
1587                 // token in the exception.
1588                 //
1589                 recognizer->reportError(recognizer);
1590
1591                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1592
1593                 return  matchedSymbol;
1594         }
1595
1596     // TODO - Single token deletion like in recoverFromMismatchedToken()
1597     //
1598     recognizer->state->error    = ANTLR3_TRUE;
1599         recognizer->state->failed       = ANTLR3_TRUE;
1600         return NULL;
1601 }
1602
1603 /// This code is factored out from mismatched token and mismatched set
1604 ///  recovery.  It handles "single token insertion" error recovery for
1605 /// both.  No tokens are consumed to recover from insertions.  Return
1606 /// true if recovery was possible else return false.
1607 ///
1608 static ANTLR3_BOOLEAN
1609 recoverFromMismatchedElement        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1610 {
1611     pANTLR3_BITSET          viableToksFollowingRule;
1612     pANTLR3_BITSET          follow;
1613     pANTLR3_PARSER          parser;
1614     pANTLR3_TREE_PARSER     tparser;
1615     pANTLR3_INT_STREAM      is;
1616
1617     switch      (recognizer->type)
1618     {
1619     case        ANTLR3_TYPE_PARSER:
1620
1621                 parser  = (pANTLR3_PARSER) (recognizer->super);
1622                 tparser = NULL;
1623                 is      = parser->tstream->istream;
1624
1625         break;
1626
1627     case        ANTLR3_TYPE_TREE_PARSER:
1628
1629                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1630                 parser  = NULL;
1631                 is      = tparser->ctnstream->tnstream->istream;
1632
1633         break;
1634
1635     default:
1636
1637                 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1638                 return ANTLR3_FALSE;
1639
1640         break;
1641     }
1642
1643     follow      = antlr3BitsetLoad(followBits);
1644
1645     if  (follow == NULL)
1646     {
1647                 /* The follow set is NULL, which means we don't know what can come
1648                  * next, so we "hit and hope" by just signifying that we cannot
1649                  * recover, which will just cause the next token to be consumed,
1650                  * which might dig us out.
1651                  */
1652                 return  ANTLR3_FALSE;
1653     }
1654
1655     /* We have a bitmap for the follow set, hence we can compute
1656      * what can follow this grammar element reference.
1657      */
1658     if  (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1659     {
1660                 /* First we need to know which of the available tokens are viable
1661                  * to follow this reference.
1662                  */
1663                 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1664
1665                 /* Remove the EOR token, which we do not wish to compute with
1666                  */
1667                 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1668                 viableToksFollowingRule->free(viableToksFollowingRule);
1669                 /* We now have the computed set of what can follow the current token
1670                  */
1671     }
1672
1673     /* We can now see if the current token works with the set of tokens
1674      * that could follow the current grammar reference. If it looks like it
1675      * is consistent, then we can "insert" that token by not throwing
1676      * an exception and assuming that we saw it.
1677      */
1678     if  ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1679     {
1680                 /* report the error, but don't cause any rules to abort and stuff
1681                  */
1682                 recognizer->reportError(recognizer);
1683                 if      (follow != NULL)
1684                 {
1685                         follow->free(follow);
1686                 }
1687                 recognizer->state->error                        = ANTLR3_FALSE;
1688                 recognizer->state->failed                       = ANTLR3_FALSE;
1689                 return ANTLR3_TRUE;     /* Success in recovery  */
1690     }
1691
1692     if  (follow != NULL)
1693     {
1694                 follow->free(follow);
1695     }
1696
1697     /* We could not find anything viable to do, so this is going to
1698      * cause an exception.
1699      */
1700     return  ANTLR3_FALSE;
1701 }
1702
1703 /// Eat tokens from the input stream until we get one of JUST the right type
1704 ///
1705 static void
1706 consumeUntil    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1707 {
1708     ANTLR3_UINT32                       ttype;
1709     pANTLR3_PARSER                      parser;
1710     pANTLR3_TREE_PARSER     tparser;
1711     pANTLR3_INT_STREAM      is;
1712
1713     switch      (recognizer->type)
1714     {
1715                 case    ANTLR3_TYPE_PARSER:
1716
1717                         parser  = (pANTLR3_PARSER) (recognizer->super);
1718                         tparser = NULL;
1719                         is      = parser->tstream->istream;
1720
1721                         break;
1722
1723                 case    ANTLR3_TYPE_TREE_PARSER:
1724
1725                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1726                         parser  = NULL;
1727                         is      = tparser->ctnstream->tnstream->istream;
1728
1729                         break;
1730
1731                 default:
1732
1733                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1734                         return;
1735
1736                         break;
1737     }
1738
1739     // What do have at the moment?
1740     //
1741     ttype       = is->_LA(is, 1);
1742
1743     // Start eating tokens until we get to the one we want.
1744     //
1745     while   (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1746     {
1747                 is->consume(is);
1748                 ttype   = is->_LA(is, 1);
1749     }
1750 }
1751
1752 /// Eat tokens from the input stream until we find one that
1753 /// belongs to the supplied set.
1754 ///
1755 static void
1756 consumeUntilSet                     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1757 {
1758     ANTLR3_UINT32           ttype;
1759     pANTLR3_PARSER          parser;
1760     pANTLR3_TREE_PARSER     tparser;
1761     pANTLR3_INT_STREAM      is;
1762
1763     switch      (recognizer->type)
1764     {
1765                 case    ANTLR3_TYPE_PARSER:
1766
1767                         parser  = (pANTLR3_PARSER) (recognizer->super);
1768                         tparser = NULL;
1769                         is      = parser->tstream->istream;
1770
1771                         break;
1772
1773                 case    ANTLR3_TYPE_TREE_PARSER:
1774
1775                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1776                         parser  = NULL;
1777                         is      = tparser->ctnstream->tnstream->istream;
1778
1779                         break;
1780
1781                 default:
1782
1783                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1784                         return;
1785
1786                         break;
1787     }
1788
1789     // What do have at the moment?
1790     //
1791     ttype       = is->_LA(is, 1);
1792
1793     // Start eating tokens until we get to one we want.
1794     //
1795     while   (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1796     {
1797                 is->consume(is);
1798                 ttype   = is->_LA(is, 1);
1799     }
1800 }
1801
1802 /** Return the rule invocation stack (how we got here in the parse.
1803  *  In the java version Ter just asks the JVM for all the information
1804  *  but in C we don't get this information, so I am going to do nothing
1805  *  right now.
1806  */
1807 static pANTLR3_STACK
1808 getRuleInvocationStack              (pANTLR3_BASE_RECOGNIZER recognizer)
1809 {
1810     return NULL;
1811 }
1812
1813 static pANTLR3_STACK
1814 getRuleInvocationStackNamed         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1815 {
1816     return NULL;
1817 }
1818
1819 /** Convenience method for template rewrites - NYI.
1820  */
1821 static pANTLR3_HASH_TABLE
1822 toStrings                           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1823 {
1824     return NULL;
1825 }
1826
1827 static  void ANTLR3_CDECL
1828 freeIntTrie    (void * trie)
1829 {
1830     ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1831 }
1832
1833
1834 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1835  *  start index before. If the rule has not parsed input starting from the supplied start index,
1836  *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1837  *  then it will return the point where it last stopped parsing after that start point.
1838  *
1839  * \remark
1840  * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1841  * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1842  * version of the table.
1843  */
1844 static ANTLR3_MARKER
1845 getRuleMemoization                  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1846 {
1847     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1848      */
1849     pANTLR3_INT_TRIE    ruleList;
1850     ANTLR3_MARKER       stopIndex;
1851     pANTLR3_TRIE_ENTRY  entry;
1852
1853     /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1854      * as we will need it eventually if we are being asked for the memo here.
1855      */
1856     entry       = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1857
1858     if  (entry == NULL)
1859     {
1860                 /* Did not find it, so create a new one for it, with a bit depth based on the
1861                  * size of the input stream. We need the bit depth to incorporate the number if
1862                  * bits required to represent the largest possible stop index in the input, which is the
1863                  * last character. An int stream is free to return the largest 64 bit offset if it has
1864                  * no idea of the size, but you should remember that this will cause the leftmost
1865                  * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1866                  */
1867                 ruleList    = antlr3IntTrieNew(63);     /* Depth is theoretically 64 bits, but probably not ;-) */
1868
1869                 if (ruleList != NULL)
1870                 {
1871                         recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1872                 }
1873
1874                 /* We cannot have a stopIndex in a trie we have just created of course
1875                  */
1876                 return  MEMO_RULE_UNKNOWN;
1877     }
1878
1879     ruleList    = (pANTLR3_INT_TRIE) (entry->data.ptr);
1880
1881     /* See if there is a stop index associated with the supplied start index.
1882      */
1883     stopIndex   = 0;
1884
1885     entry = ruleList->get(ruleList, ruleParseStart);
1886     if (entry != NULL)
1887     {
1888                 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1889     }
1890
1891     if  (stopIndex == 0)
1892     {
1893                 return MEMO_RULE_UNKNOWN;
1894     }
1895
1896     return  stopIndex;
1897 }
1898
1899 /** Has this rule already parsed input at the current index in the
1900  *  input stream?  Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1901  *  if we have not.
1902  *
1903  *  This method has a side-effect: if we have seen this input for
1904  *  this rule and successfully parsed before, then seek ahead to
1905  *  1 past the stop token matched for this rule last time.
1906  */
1907 static ANTLR3_BOOLEAN
1908 alreadyParsedRule                   (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1909 {
1910     ANTLR3_MARKER                       stopIndex;
1911     pANTLR3_LEXER                       lexer;
1912     pANTLR3_PARSER                      parser;
1913     pANTLR3_TREE_PARSER     tparser;
1914     pANTLR3_INT_STREAM      is;
1915
1916     switch      (recognizer->type)
1917     {
1918                 case    ANTLR3_TYPE_PARSER:
1919
1920                         parser  = (pANTLR3_PARSER) (recognizer->super);
1921                         tparser = NULL;
1922                         lexer   = NULL;
1923                         is      = parser->tstream->istream;
1924
1925                         break;
1926
1927                 case    ANTLR3_TYPE_TREE_PARSER:
1928
1929                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1930                         parser  = NULL;
1931                         lexer   = NULL;
1932                         is      = tparser->ctnstream->tnstream->istream;
1933
1934                         break;
1935
1936                 case    ANTLR3_TYPE_LEXER:
1937
1938                         lexer   = (pANTLR3_LEXER)   (recognizer->super);
1939                         parser  = NULL;
1940                         tparser = NULL;
1941                         is      = lexer->input->istream;
1942                         break;
1943
1944                 default:
1945
1946                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1947                         return ANTLR3_FALSE;
1948
1949                         break;
1950     }
1951
1952     /* See if we have a memo marker for this.
1953      */
1954     stopIndex       = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1955
1956     if  (stopIndex  == MEMO_RULE_UNKNOWN)
1957     {
1958                 return ANTLR3_FALSE;
1959     }
1960
1961     if  (stopIndex == MEMO_RULE_FAILED)
1962     {
1963                 recognizer->state->failed = ANTLR3_TRUE;
1964     }
1965     else
1966     {
1967                 is->seek(is, stopIndex+1);
1968     }
1969
1970     /* If here then the rule was executed for this input already
1971      */
1972     return  ANTLR3_TRUE;
1973 }
1974
1975 /** Record whether or not this rule parsed the input at this position
1976  *  successfully.
1977  */
1978 static void
1979 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1980 {
1981     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1982      */
1983     pANTLR3_INT_TRIE        ruleList;
1984     pANTLR3_TRIE_ENTRY      entry;
1985     ANTLR3_MARKER           stopIndex;
1986     pANTLR3_LEXER           lexer;
1987     pANTLR3_PARSER          parser;
1988     pANTLR3_TREE_PARSER     tparser;
1989     pANTLR3_INT_STREAM      is;
1990
1991     switch      (recognizer->type)
1992     {
1993                 case    ANTLR3_TYPE_PARSER:
1994
1995                         parser  = (pANTLR3_PARSER) (recognizer->super);
1996                         tparser = NULL;
1997                         is      = parser->tstream->istream;
1998
1999                         break;
2000
2001                 case    ANTLR3_TYPE_TREE_PARSER:
2002
2003                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2004                         parser  = NULL;
2005                         is      = tparser->ctnstream->tnstream->istream;
2006
2007                         break;
2008
2009                 case    ANTLR3_TYPE_LEXER:
2010
2011                         lexer   = (pANTLR3_LEXER)   (recognizer->super);
2012                         parser  = NULL;
2013                         tparser = NULL;
2014                         is              = lexer->input->istream;
2015                         break;
2016
2017                 default:
2018
2019                         ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2020                         return;
2021
2022                         break;
2023     }
2024
2025     stopIndex   = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2026
2027     entry       = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2028
2029     if  (entry != NULL)
2030     {
2031                 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2032
2033                 /* If we don't already have this entry, append it. The memoize trie does not
2034                  * accept duplicates so it won't add it if already there and we just ignore the
2035                  * return code as we don't care if it is there already.
2036                  */
2037                 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2038     }
2039 }
2040 /** A syntactic predicate.  Returns true/false depending on whether
2041  *  the specified grammar fragment matches the current input stream.
2042  *  This resets the failed instance var afterwards.
2043  */
2044 static ANTLR3_BOOLEAN
2045 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2046 {
2047     ANTLR3_MARKER   start;
2048     pANTLR3_PARSER          parser;
2049     pANTLR3_TREE_PARSER     tparser;
2050     pANTLR3_INT_STREAM      is;
2051
2052     switch      (recognizer->type)
2053     {
2054                 case    ANTLR3_TYPE_PARSER:
2055
2056                         parser  = (pANTLR3_PARSER) (recognizer->super);
2057                         tparser = NULL;
2058                         is      = parser->tstream->istream;
2059
2060                         break;
2061
2062                 case    ANTLR3_TYPE_TREE_PARSER:
2063
2064                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2065                         parser  = NULL;
2066                         is      = tparser->ctnstream->tnstream->istream;
2067
2068                         break;
2069
2070                 default:
2071
2072                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2073                         return ANTLR3_FALSE;
2074
2075                         break;
2076     }
2077
2078     /* Begin backtracking so we can get back to where we started after trying out
2079      * the syntactic predicate.
2080      */
2081     start   = is->mark(is);
2082     recognizer->state->backtracking++;
2083
2084     /* Try the syntactical predicate
2085      */
2086     predicate(ctx);
2087
2088     /* Reset
2089      */
2090     is->rewind(is, start);
2091     recognizer->state->backtracking--;
2092
2093     if  (recognizer->state->failed == ANTLR3_TRUE)
2094     {
2095                 /* Predicate failed
2096                  */
2097                 recognizer->state->failed = ANTLR3_FALSE;
2098                 return  ANTLR3_FALSE;
2099     }
2100     else
2101     {
2102                 /* Predicate was successful
2103                  */
2104                 recognizer->state->failed       = ANTLR3_FALSE;
2105                 return  ANTLR3_TRUE;
2106     }
2107 }
2108
2109 static void
2110 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2111 {
2112     if  (recognizer->state->following != NULL)
2113     {
2114                 stackFree(recognizer->state->following);
2115     }
2116
2117         // Reset the state flags
2118         //
2119         recognizer->state->errorRecovery        = ANTLR3_FALSE;
2120         recognizer->state->lastErrorIndex       = -1;
2121         recognizer->state->failed                       = ANTLR3_FALSE;
2122         recognizer->state->errorCount           = 0;
2123         recognizer->state->backtracking         = 0;
2124         recognizer->state->following            = NULL;
2125
2126         if      (recognizer->state != NULL)
2127         {
2128                 if      (recognizer->state->ruleMemo != NULL)
2129                 {
2130                         recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2131                         recognizer->state->ruleMemo = antlr3IntTrieNew(15);     /* 16 bit depth is enough for 32768 rules! */
2132                 }
2133         }
2134
2135
2136     // Install a new following set
2137     //
2138     recognizer->state->following   = antlr3StackNew(8);
2139
2140 }
2141
2142 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2143 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2144 //
2145 static void *
2146 getCurrentInputSymbol           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2147 {
2148         return tokLT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2149 }
2150
2151 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2152 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2153 //
2154 static void *
2155 getMissingSymbol                        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION      e,
2156                                                                         ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2157 {
2158         pANTLR3_TOKEN_STREAM                    ts;
2159         pANTLR3_COMMON_TOKEN                    token;
2160         pANTLR3_COMMON_TOKEN                    current;
2161         pANTLR3_STRING                                  text;
2162
2163         // Dereference the standard pointers
2164         //
2165         ts              = (pANTLR3_TOKEN_STREAM)istream->super;
2166
2167         // Work out what to use as the current symbol to make a line and offset etc
2168         // If we are at EOF, we use the token before EOF
2169         //
2170         current = tokLT(ts, 1);
2171         if      (current->type == ANTLR3_TOKEN_EOF)
2172         {
2173                 current = tokLT(ts, -1);
2174         }
2175
2176         // Create a new empty token
2177         //
2178         if      (recognizer->state->tokFactory == NULL)
2179         {
2180                 // We don't yet have a token factory for making tokens
2181                 // we just need a fake one using the input stream of the current
2182                 // token.
2183                 //
2184                 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2185         }
2186         token   = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2187
2188         // Set some of the token properties based on the current token
2189         //
2190         token->line         = current->line;
2191         token->charPosition = current->charPosition;
2192         token->channel      = ANTLR3_TOKEN_DEFAULT_CHANNEL;
2193         token->type         = expectedTokenType;
2194     token->user1        = current->user1;
2195     token->user2        = current->user2;
2196     token->user3        = current->user3;
2197     token->custom       = current->custom;
2198     token->lineStart    = current->lineStart;
2199
2200         // Create the token text that shows it has been inserted
2201         //
2202         token->setText8(token, (pANTLR3_UINT8)"<missing ");
2203         text = token->getText(token);
2204
2205         if      (text != NULL)
2206         {
2207                 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2208                 text->append8(text, (const char *)">");
2209         }
2210
2211         // Finally return the pointer to our new token
2212         //
2213         return  token;
2214 }
2215
2216
2217 #ifdef  ANTLR3_WINDOWS
2218 #pragma warning( default : 4100 )
2219 #endif
2220
2221 /// @}
2222 ///
2223