3 * Base implementation of an antlr 3 lexer.
5 * An ANTLR3 lexer implements a base recongizer, a token source and
6 * a lexer interface. It constructs a base recognizer with default
7 * functions, then overrides any of these that are parser specific (usual
8 * default implementation of base recognizer.
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
16 // All rights reserved.
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
21 // 1. Redistributions of source code must retain the above copyright
22 // notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 // notice, this list of conditions and the following disclaimer in the
25 // documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <antlr3lexer.h>
42 static void mTokens (pANTLR3_LEXER lexer
);
43 static void setCharStream (pANTLR3_LEXER lexer
, pANTLR3_INPUT_STREAM input
);
44 static void pushCharStream (pANTLR3_LEXER lexer
, pANTLR3_INPUT_STREAM input
);
45 static void popCharStream (pANTLR3_LEXER lexer
);
47 static void emitNew (pANTLR3_LEXER lexer
, pANTLR3_COMMON_TOKEN token
);
48 static pANTLR3_COMMON_TOKEN
emit (pANTLR3_LEXER lexer
);
49 static ANTLR3_BOOLEAN
matchs (pANTLR3_LEXER lexer
, ANTLR3_UCHAR
* string
);
50 static ANTLR3_BOOLEAN
matchc (pANTLR3_LEXER lexer
, ANTLR3_UCHAR c
);
51 static ANTLR3_BOOLEAN
matchRange (pANTLR3_LEXER lexer
, ANTLR3_UCHAR low
, ANTLR3_UCHAR high
);
52 static void matchAny (pANTLR3_LEXER lexer
);
53 static void recover (pANTLR3_LEXER lexer
);
54 static ANTLR3_UINT32
getLine (pANTLR3_LEXER lexer
);
55 static ANTLR3_MARKER
getCharIndex (pANTLR3_LEXER lexer
);
56 static ANTLR3_UINT32
getCharPositionInLine (pANTLR3_LEXER lexer
);
57 static pANTLR3_STRING
getText (pANTLR3_LEXER lexer
);
58 static pANTLR3_COMMON_TOKEN
nextToken (pANTLR3_TOKEN_SOURCE toksource
);
60 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec
, pANTLR3_UINT8
* tokenNames
);
61 static void reportError (pANTLR3_BASE_RECOGNIZER rec
);
62 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer
, pANTLR3_INT_STREAM istream
);
63 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer
, pANTLR3_INT_STREAM istream
, pANTLR3_EXCEPTION e
,
64 ANTLR3_UINT32 expectedTokenType
, pANTLR3_BITSET_LIST follow
);
66 static void reset (pANTLR3_BASE_RECOGNIZER rec
);
68 static void freeLexer (pANTLR3_LEXER lexer
);
71 ANTLR3_API pANTLR3_LEXER
72 antlr3LexerNew(ANTLR3_UINT32 sizeHint
, pANTLR3_RECOGNIZER_SHARED_STATE state
)
75 pANTLR3_COMMON_TOKEN specialT
;
79 lexer
= (pANTLR3_LEXER
) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER
));
86 /* Now we need to create the base recognizer
88 lexer
->rec
= antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER
, sizeHint
, state
);
90 if (lexer
->rec
== NULL
)
95 lexer
->rec
->super
= lexer
;
97 lexer
->rec
->displayRecognitionError
= displayRecognitionError
;
98 lexer
->rec
->reportError
= reportError
;
99 lexer
->rec
->reset
= reset
;
100 lexer
->rec
->getCurrentInputSymbol
= getCurrentInputSymbol
;
101 lexer
->rec
->getMissingSymbol
= getMissingSymbol
;
103 /* Now install the token source interface
105 if (lexer
->rec
->state
->tokSource
== NULL
)
107 lexer
->rec
->state
->tokSource
= (pANTLR3_TOKEN_SOURCE
)ANTLR3_MALLOC(sizeof(ANTLR3_TOKEN_SOURCE
));
109 if (lexer
->rec
->state
->tokSource
== NULL
)
111 lexer
->rec
->free(lexer
->rec
);
116 lexer
->rec
->state
->tokSource
->super
= lexer
;
118 /* Install the default nextToken() method, which may be overridden
119 * by generated code, or by anything else in fact.
121 lexer
->rec
->state
->tokSource
->nextToken
= nextToken
;
122 lexer
->rec
->state
->tokSource
->strFactory
= NULL
;
124 lexer
->rec
->state
->tokFactory
= NULL
;
127 /* Install the lexer API
129 lexer
->setCharStream
= setCharStream
;
130 lexer
->mTokens
= (void (*)(void *))(mTokens
);
131 lexer
->setCharStream
= setCharStream
;
132 lexer
->pushCharStream
= pushCharStream
;
133 lexer
->popCharStream
= popCharStream
;
135 lexer
->emitNew
= emitNew
;
136 lexer
->matchs
= matchs
;
137 lexer
->matchc
= matchc
;
138 lexer
->matchRange
= matchRange
;
139 lexer
->matchAny
= matchAny
;
140 lexer
->recover
= recover
;
141 lexer
->getLine
= getLine
;
142 lexer
->getCharIndex
= getCharIndex
;
143 lexer
->getCharPositionInLine
= getCharPositionInLine
;
144 lexer
->getText
= getText
;
145 lexer
->free
= freeLexer
;
147 /* Initialise the eof token
149 specialT
= &(lexer
->rec
->state
->tokSource
->eofToken
);
150 antlr3SetTokenAPI (specialT
);
151 specialT
->type
= ANTLR3_TOKEN_EOF
;
152 specialT
->factoryMade
= ANTLR3_TRUE
; // Prevent things trying to free() it
153 specialT
->strFactory
= NULL
;
155 // Initialize the skip token.
157 specialT
= &(lexer
->rec
->state
->tokSource
->skipToken
);
158 antlr3SetTokenAPI (specialT
);
159 specialT
->type
= ANTLR3_TOKEN_INVALID
;
160 specialT
->factoryMade
= ANTLR3_TRUE
; // Prevent things trying to free() it
161 specialT
->strFactory
= NULL
;
166 reset (pANTLR3_BASE_RECOGNIZER rec
)
172 lexer
->rec
->state
->token
= NULL
;
173 lexer
->rec
->state
->type
= ANTLR3_TOKEN_INVALID
;
174 lexer
->rec
->state
->channel
= ANTLR3_TOKEN_DEFAULT_CHANNEL
;
175 lexer
->rec
->state
->tokenStartCharIndex
= -1;
176 lexer
->rec
->state
->tokenStartCharPositionInLine
= -1;
177 lexer
->rec
->state
->tokenStartLine
= -1;
179 lexer
->rec
->state
->text
= NULL
;
181 if (lexer
->input
!= NULL
)
183 lexer
->input
->istream
->seek(lexer
->input
->istream
, 0);
189 /// Returns the next available token from the current input stream.
192 /// Points to the implementation of a token source. The lexer is
193 /// addressed by the super structure pointer.
196 /// The next token in the current input stream or the EOF token
197 /// if there are no more tokens.
200 /// Write remarks for nextToken here.
204 ANTLR3_INLINE
static pANTLR3_COMMON_TOKEN
205 nextTokenStr (pANTLR3_TOKEN_SOURCE toksource
)
209 lexer
= (pANTLR3_LEXER
)(toksource
->super
);
211 /// Loop until we get a non skipped token or EOF
215 // Get rid of any previous token (token factory takes care of
216 // any de-allocation when this token is finally used up.
218 lexer
->rec
->state
->token
= NULL
;
219 lexer
->rec
->state
->error
= ANTLR3_FALSE
; // Start out without an exception
220 lexer
->rec
->state
->failed
= ANTLR3_FALSE
;
224 // Now call the matching rules and see if we can generate a new token
228 // Record the start of the token in our input stream.
230 lexer
->rec
->state
->channel
= ANTLR3_TOKEN_DEFAULT_CHANNEL
;
231 lexer
->rec
->state
->tokenStartCharIndex
= lexer
->input
->istream
->index(lexer
->input
->istream
);
232 lexer
->rec
->state
->tokenStartCharPositionInLine
= lexer
->input
->getCharPositionInLine(lexer
->input
);
233 lexer
->rec
->state
->tokenStartLine
= lexer
->input
->getLine(lexer
->input
);
234 lexer
->rec
->state
->text
= NULL
;
236 if (lexer
->input
->istream
->_LA(lexer
->input
->istream
, 1) == ANTLR3_CHARSTREAM_EOF
)
238 // Reached the end of the current stream, nothing more to do if this is
239 // the last in the stack.
241 pANTLR3_COMMON_TOKEN teof
= &(toksource
->eofToken
);
243 teof
->start
= lexer
->getCharIndex(lexer
);
244 teof
->stop
= lexer
->getCharIndex(lexer
);
245 teof
->line
= lexer
->getLine(lexer
);
246 teof
->factoryMade
= ANTLR3_TRUE
; // This isn't really manufactured but it stops things from trying to free it
250 lexer
->rec
->state
->token
= NULL
;
251 lexer
->rec
->state
->error
= ANTLR3_FALSE
; // Start out without an exception
252 lexer
->rec
->state
->failed
= ANTLR3_FALSE
;
254 // Call the generated lexer, see if it can get a new token together.
256 lexer
->mTokens(lexer
->ctx
);
258 if (lexer
->rec
->state
->error
== ANTLR3_TRUE
)
260 // Recognition exception, report it and try to recover.
262 lexer
->rec
->state
->failed
= ANTLR3_TRUE
;
263 lexer
->rec
->reportError(lexer
->rec
);
264 lexer
->recover(lexer
);
268 if (lexer
->rec
->state
->token
== NULL
)
270 // Emit the real token, which adds it in to the token stream basically
274 else if (lexer
->rec
->state
->token
== &(toksource
->skipToken
))
276 // A real token could have been generated, but "Computer say's naaaaah" and it
277 // it is just something we need to skip altogether.
282 // Good token, not skipped, not EOF token
284 return lexer
->rec
->state
->token
;
292 * Default implementation of the nextToken() call for a lexer.
295 * Points to the implementation of a token source. The lexer is
296 * addressed by the super structure pointer.
299 * The next token in the current input stream or the EOF token
300 * if there are no more tokens in any input stream in the stack.
302 * Write detailed description for nextToken here.
305 * Write remarks for nextToken here.
309 static pANTLR3_COMMON_TOKEN
310 nextToken (pANTLR3_TOKEN_SOURCE toksource
)
312 pANTLR3_COMMON_TOKEN tok
;
314 // Find the next token in the current stream
316 tok
= nextTokenStr(toksource
);
318 // If we got to the EOF token then switch to the previous
319 // input stream if there were any and just return the
320 // EOF if there are none. We must check the next token
321 // in any outstanding input stream we pop into the active
322 // role to see if it was sitting at EOF after PUSHing the
323 // stream we just consumed, otherwise we will return EOF
324 // on the reinstalled input stream, when in actual fact
325 // there might be more input streams to POP before the
326 // real EOF of the whole logical inptu stream. Hence we
327 // use a while loop here until we find somethign in the stream
328 // that isn't EOF or we reach the actual end of the last input
329 // stream on the stack.
331 while (tok
->type
== ANTLR3_TOKEN_EOF
)
335 lexer
= (pANTLR3_LEXER
)(toksource
->super
);
337 if (lexer
->rec
->state
->streams
!= NULL
&& lexer
->rec
->state
->streams
->vector
->count
> 0)
339 // We have another input stream in the stack so we
340 // need to revert to it, then resume the loop to check
341 // it wasn't sitting at EOF itself.
343 lexer
->popCharStream(lexer
);
344 tok
= nextTokenStr(toksource
);
348 // There were no more streams on the input stack
349 // so this EOF is the 'real' logical EOF for
350 // the input stream. So we just exit the loop and
351 // return the EOF we have found.
358 // return whatever token we have, which may be EOF
363 ANTLR3_API pANTLR3_LEXER
364 antlr3LexerNewStream(ANTLR3_UINT32 sizeHint
, pANTLR3_INPUT_STREAM input
, pANTLR3_RECOGNIZER_SHARED_STATE state
)
368 // Create a basic lexer first
370 lexer
= antlr3LexerNew(sizeHint
, state
);
374 // Install the input stream and reset the lexer
376 setCharStream(lexer
, input
);
382 static void mTokens (pANTLR3_LEXER lexer
)
384 if (lexer
) // Fool compiler, avoid pragmas
386 ANTLR3_FPRINTF(stderr
, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
391 reportError (pANTLR3_BASE_RECOGNIZER rec
)
393 // Indicate this recognizer had an error while processing.
395 rec
->state
->errorCount
++;
397 rec
->displayRecognitionError(rec
, rec
->state
->tokenNames
);
400 #ifdef ANTLR3_WINDOWS
401 #pragma warning( disable : 4100 )
404 /** Default lexer error handler (works for 8 bit streams only!!!)
407 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer
, pANTLR3_UINT8
* tokenNames
)
410 pANTLR3_EXCEPTION ex
;
411 pANTLR3_STRING ftext
;
413 lexer
= (pANTLR3_LEXER
)(recognizer
->super
);
414 ex
= lexer
->rec
->state
->exception
;
416 // See if there is a 'filename' we can use
418 if (ex
->name
== NULL
)
420 ANTLR3_FPRINTF(stderr
, "-unknown source-(");
424 ftext
= ex
->streamName
->to8(ex
->streamName
);
425 ANTLR3_FPRINTF(stderr
, "%s(", ftext
->chars
);
428 ANTLR3_FPRINTF(stderr
, "%d) ", recognizer
->state
->exception
->line
);
429 ANTLR3_FPRINTF(stderr
, ": lexer error %d :\n\t%s at offset %d, ",
431 (pANTLR3_UINT8
) (ex
->message
),
432 ex
->charPositionInLine
+1
437 width
= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8
)(lexer
->input
->data
) + (lexer
->input
->size(lexer
->input
) )) - (pANTLR3_UINT8
)(ex
->index
));
443 ANTLR3_FPRINTF(stderr
, "near '%c' :\n", ex
->c
);
447 ANTLR3_FPRINTF(stderr
, "near char(%#02X) :\n", (ANTLR3_UINT8
)(ex
->c
));
449 ANTLR3_FPRINTF(stderr
, "\t%.*s\n", width
> 20 ? 20 : width
,((pANTLR3_UINT8
)ex
->index
));
453 ANTLR3_FPRINTF(stderr
, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
454 ANTLR3_FPRINTF(stderr
, "\t The lexer was matching from line %d, offset %d, which\n\t ",
455 (ANTLR3_UINT32
)(lexer
->rec
->state
->tokenStartLine
),
456 (ANTLR3_UINT32
)(lexer
->rec
->state
->tokenStartCharPositionInLine
)
458 width
= ANTLR3_UINT32_CAST(((pANTLR3_UINT8
)(lexer
->input
->data
)+(lexer
->input
->size(lexer
->input
))) - (pANTLR3_UINT8
)(lexer
->rec
->state
->tokenStartCharIndex
));
462 ANTLR3_FPRINTF(stderr
, "looks like this:\n\t\t%.*s\n", width
> 20 ? 20 : width
,(pANTLR3_UINT8
)(lexer
->rec
->state
->tokenStartCharIndex
));
466 ANTLR3_FPRINTF(stderr
, "is also the end of the line, so you must check your lexer rules\n");
472 static void setCharStream (pANTLR3_LEXER lexer
, pANTLR3_INPUT_STREAM input
)
474 /* Install the input interface
476 lexer
->input
= input
;
478 /* We may need a token factory for the lexer; we don't destroy any existing factory
479 * until the lexer is destroyed, as people may still be using the tokens it produced.
480 * TODO: Later I will provide a dup() method for a token so that it can extract itself
481 * out of the factory.
483 if (lexer
->rec
->state
->tokFactory
== NULL
)
485 lexer
->rec
->state
->tokFactory
= antlr3TokenFactoryNew(input
);
489 /* When the input stream is being changed on the fly, rather than
490 * at the start of a new lexer, then we must tell the tokenFactory
491 * which input stream to adorn the tokens with so that when they
492 * are asked to provide their original input strings they can
493 * do so from the correct text stream.
495 lexer
->rec
->state
->tokFactory
->setInputStream(lexer
->rec
->state
->tokFactory
, input
);
498 /* Propagate the string factory so that we preserve the encoding form from
501 if (lexer
->rec
->state
->tokSource
->strFactory
== NULL
)
503 lexer
->rec
->state
->tokSource
->strFactory
= input
->strFactory
;
505 // Set the newly acquired string factory up for our pre-made tokens
508 if (lexer
->rec
->state
->tokSource
->eofToken
.strFactory
== NULL
)
510 lexer
->rec
->state
->tokSource
->eofToken
.strFactory
= input
->strFactory
;
514 /* This is a lexer, install the appropriate exception creator
516 lexer
->rec
->exConstruct
= antlr3RecognitionExceptionNew
;
518 /* Set the current token to nothing
520 lexer
->rec
->state
->token
= NULL
;
521 lexer
->rec
->state
->text
= NULL
;
522 lexer
->rec
->state
->tokenStartCharIndex
= -1;
524 /* Copy the name of the char stream to the token source
526 lexer
->rec
->state
->tokSource
->fileName
= input
->fileName
;
531 * Change to a new input stream, remembering the old one.
534 * Pointer to the lexer instance to switch input streams for.
537 * New input stream to install as the current one.
539 * Switches the current character input stream to
540 * a new one, saving the old one, which we will revert to at the end of this
544 pushCharStream (pANTLR3_LEXER lexer
, pANTLR3_INPUT_STREAM input
)
546 // Do we need a new input stream stack?
548 if (lexer
->rec
->state
->streams
== NULL
)
550 // This is the first call to stack a new
551 // stream and so we must create the stack first.
553 lexer
->rec
->state
->streams
= antlr3StackNew(0);
555 if (lexer
->rec
->state
->streams
== NULL
)
557 // Could not do this, we just fail to push it.
558 // TODO: Consider if this is what we want to do, but then
559 // any programmer can override this method to do something else.
564 // We have a stack, so we can save the current input stream
567 lexer
->input
->istream
->mark(lexer
->input
->istream
);
568 stackPush(lexer
->rec
->state
->streams
, lexer
->input
, NULL
);
570 // And now we can install this new one
572 lexer
->setCharStream(lexer
, input
);
577 * Stops using the current input stream and reverts to any prior
578 * input stream on the stack.
581 * Description of parameter lexer.
583 * Pointer to a function that abandons the current input stream, whether it
584 * is empty or not and reverts to the previous stacked input stream.
587 * The function fails silently if there are no prior input streams.
590 popCharStream (pANTLR3_LEXER lexer
)
592 pANTLR3_INPUT_STREAM input
;
594 // If we do not have a stream stack or we are already at the
595 // stack bottom, then do nothing.
597 if (lexer
->rec
->state
->streams
!= NULL
&& lexer
->rec
->state
->streams
->vector
->count
> 0)
599 // We just leave the current stream to its fate, we do not close
600 // it or anything as we do not know what the programmer intended
601 // for it. This method can always be overridden of course.
602 // So just find out what was currently saved on the stack and use
603 // that now, then pop it from the stack.
605 input
= (pANTLR3_INPUT_STREAM
)(lexer
->rec
->state
->streams
->top
);
606 stackPop(lexer
->rec
->state
->streams
);
608 // Now install the stream as the current one.
610 lexer
->setCharStream(lexer
, input
);
611 lexer
->input
->istream
->rewindLast(lexer
->input
->istream
);
616 static void emitNew (pANTLR3_LEXER lexer
, pANTLR3_COMMON_TOKEN token
)
618 lexer
->rec
->state
->token
= token
; /* Voila! */
621 static pANTLR3_COMMON_TOKEN
622 emit (pANTLR3_LEXER lexer
)
624 pANTLR3_COMMON_TOKEN token
;
626 /* We could check pointers to token factories and so on, but
627 * we are in code that we want to run as fast as possible
628 * so we are not checking any errors. So make sure you have installed an input stream before
629 * trying to emit a new token.
631 token
= lexer
->rec
->state
->tokFactory
->newToken(lexer
->rec
->state
->tokFactory
);
633 /* Install the supplied information, and some other bits we already know
634 * get added automatically, such as the input stream it is associated with
635 * (though it can all be overridden of course)
637 token
->type
= lexer
->rec
->state
->type
;
638 token
->channel
= lexer
->rec
->state
->channel
;
639 token
->start
= lexer
->rec
->state
->tokenStartCharIndex
;
640 token
->stop
= lexer
->getCharIndex(lexer
) - 1;
641 token
->line
= lexer
->rec
->state
->tokenStartLine
;
642 token
->charPosition
= lexer
->rec
->state
->tokenStartCharPositionInLine
;
644 if (lexer
->rec
->state
->text
!= NULL
)
646 token
->textState
= ANTLR3_TEXT_STRING
;
647 token
->tokText
.text
= lexer
->rec
->state
->text
;
651 token
->textState
= ANTLR3_TEXT_NONE
;
653 token
->lineStart
= lexer
->input
->currentLine
;
654 token
->user1
= lexer
->rec
->state
->user1
;
655 token
->user2
= lexer
->rec
->state
->user2
;
656 token
->user3
= lexer
->rec
->state
->user3
;
657 token
->custom
= lexer
->rec
->state
->custom
;
659 lexer
->rec
->state
->token
= token
;
665 * Free the resources allocated by a lexer
668 freeLexer (pANTLR3_LEXER lexer
)
670 // This may have ben a delegate or delegator lexer, in which case the
671 // state may already have been freed (and set to NULL therefore)
672 // so we ignore the state if we don't have it.
674 if (lexer
->rec
->state
!= NULL
)
676 if (lexer
->rec
->state
->streams
!= NULL
)
678 stackFree(lexer
->rec
->state
->streams
);
680 if (lexer
->rec
->state
->tokFactory
!= NULL
)
682 lexer
->rec
->state
->tokFactory
->close(lexer
->rec
->state
->tokFactory
);
683 lexer
->rec
->state
->tokFactory
= NULL
;
685 if (lexer
->rec
->state
->tokSource
!= NULL
)
687 ANTLR3_FREE(lexer
->rec
->state
->tokSource
);
688 lexer
->rec
->state
->tokSource
= NULL
;
691 if (lexer
->rec
!= NULL
)
693 lexer
->rec
->free(lexer
->rec
);
699 /** Implementation of matchs for the lexer, overrides any
700 * base implementation in the base recognizer.
703 * Note that the generated code lays down arrays of ints for constant
704 * strings so that they are int UTF32 form!
706 static ANTLR3_BOOLEAN
707 matchs(pANTLR3_LEXER lexer
, ANTLR3_UCHAR
* string
)
709 while (*string
!= ANTLR3_STRING_TERMINATOR
)
711 if (lexer
->input
->istream
->_LA(lexer
->input
->istream
, 1) != (*string
))
713 if (lexer
->rec
->state
->backtracking
> 0)
715 lexer
->rec
->state
->failed
= ANTLR3_TRUE
;
719 lexer
->rec
->exConstruct(lexer
->rec
);
720 lexer
->rec
->state
->failed
= ANTLR3_TRUE
;
722 /* TODO: Implement exception creation more fully perhaps
724 lexer
->recover(lexer
);
728 /* Matched correctly, do consume it
730 lexer
->input
->istream
->consume(lexer
->input
->istream
);
733 /* Reset any failed indicator
735 lexer
->rec
->state
->failed
= ANTLR3_FALSE
;
742 /** Implementation of matchc for the lexer, overrides any
743 * base implementation in the base recognizer.
746 * Note that the generated code lays down arrays of ints for constant
747 * strings so that they are int UTF32 form!
749 static ANTLR3_BOOLEAN
750 matchc(pANTLR3_LEXER lexer
, ANTLR3_UCHAR c
)
752 if (lexer
->input
->istream
->_LA(lexer
->input
->istream
, 1) == c
)
754 /* Matched correctly, do consume it
756 lexer
->input
->istream
->consume(lexer
->input
->istream
);
758 /* Reset any failed indicator
760 lexer
->rec
->state
->failed
= ANTLR3_FALSE
;
765 /* Failed to match, exception and recovery time.
767 if (lexer
->rec
->state
->backtracking
> 0)
769 lexer
->rec
->state
->failed
= ANTLR3_TRUE
;
773 lexer
->rec
->exConstruct(lexer
->rec
);
775 /* TODO: Implement exception creation more fully perhaps
777 lexer
->recover(lexer
);
782 /** Implementation of match range for the lexer, overrides any
783 * base implementation in the base recognizer.
786 * Note that the generated code lays down arrays of ints for constant
787 * strings so that they are int UTF32 form!
789 static ANTLR3_BOOLEAN
790 matchRange(pANTLR3_LEXER lexer
, ANTLR3_UCHAR low
, ANTLR3_UCHAR high
)
794 /* What is in the stream at the moment?
796 c
= lexer
->input
->istream
->_LA(lexer
->input
->istream
, 1);
797 if ( c
>= low
&& c
<= high
)
799 /* Matched correctly, consume it
801 lexer
->input
->istream
->consume(lexer
->input
->istream
);
803 /* Reset any failed indicator
805 lexer
->rec
->state
->failed
= ANTLR3_FALSE
;
810 /* Failed to match, execption and recovery time.
813 if (lexer
->rec
->state
->backtracking
> 0)
815 lexer
->rec
->state
->failed
= ANTLR3_TRUE
;
819 lexer
->rec
->exConstruct(lexer
->rec
);
821 /* TODO: Implement exception creation more fully
823 lexer
->recover(lexer
);
829 matchAny (pANTLR3_LEXER lexer
)
831 lexer
->input
->istream
->consume(lexer
->input
->istream
);
835 recover (pANTLR3_LEXER lexer
)
837 lexer
->input
->istream
->consume(lexer
->input
->istream
);
841 getLine (pANTLR3_LEXER lexer
)
843 return lexer
->input
->getLine(lexer
->input
);
847 getCharPositionInLine (pANTLR3_LEXER lexer
)
849 return lexer
->input
->getCharPositionInLine(lexer
->input
);
852 static ANTLR3_MARKER
getCharIndex (pANTLR3_LEXER lexer
)
854 return lexer
->input
->istream
->index(lexer
->input
->istream
);
857 static pANTLR3_STRING
858 getText (pANTLR3_LEXER lexer
)
860 if (lexer
->rec
->state
->text
)
862 return lexer
->rec
->state
->text
;
865 return lexer
->input
->substr(
867 lexer
->rec
->state
->tokenStartCharIndex
,
868 lexer
->getCharIndex(lexer
) - lexer
->input
->charByteSize
874 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer
, pANTLR3_INT_STREAM istream
)
880 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer
, pANTLR3_INT_STREAM istream
, pANTLR3_EXCEPTION e
,
881 ANTLR3_UINT32 expectedTokenType
, pANTLR3_BITSET_LIST follow
)