1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are welcomed to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome(read crave for)
19 // any suggestions and help from the users.
21 // Allen Leung (leunga@cs.nyu.edu)
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef lexer_buffer_h
26 #define lexer_buffer_h
29 #include <AD/generic/generic.h> // generic definitions
30 #include <AD/automata/dfatable.h> // automata tables
31 #include <AD/automata/regexmat.h> // regular expression string matcher
33 //////////////////////////////////////////////////////////////////////////////
34 // This calls implements a backtrackable buffer for implementing
36 //////////////////////////////////////////////////////////////////////////////
37 class LexerBuffer
: public DFATables
{
39 ///////////////////////////////////////////////////////////////////////////
41 ///////////////////////////////////////////////////////////////////////////
42 typedef DFATables Super
;
43 typedef Super::Symbol Symbol
;
44 typedef Super::State State
;
45 typedef Super::Offset Offset
;
46 typedef Super::Rule Rule
;
49 ///////////////////////////////////////////////////////////////////////////
50 // Internal buffer and other members
51 ///////////////////////////////////////////////////////////////////////////
52 friend class RegexMatch
;
53 char * buffer
; // buffer
54 char * buffer_limit
; // end of the buffer
55 char * cursor
; // pointer to current token
56 char * cursor_limit
; // pointer to the end of the token
57 Bool anchored
; // are we at the beginning of the line.
58 Bool pinned
; // is the buffer pinned?
59 Bool more_input
; // has more input?
60 int saved_char
; // saved last character from last token.
61 int current_context
; // current context number
66 ///////////////////////////////////////////////////////////////////////////
67 // Constructor and destructor
68 ///////////////////////////////////////////////////////////////////////////
71 LexerBuffer(char *, size_t);
72 virtual ~LexerBuffer();
74 ///////////////////////////////////////////////////////////////////////////
76 ///////////////////////////////////////////////////////////////////////////
77 inline int capacity () const { return buffer_limit
- buffer
; }
78 inline int length () const { return cursor_limit
- cursor
; }
79 inline operator const char * () const { return cursor
; }
80 inline operator char * () { return cursor
; }
81 inline int context () const { return current_context
; }
82 inline Bool
is_anchored() const { return anchored
; }
83 inline const char * text () const { return cursor
; }
84 inline char * text () { return cursor
; }
85 inline char operator [] (int i
) const { return cursor
[i
]; }
86 inline char& operator [] (int i
) { return cursor
[i
]; }
87 inline int lookahead () const { return saved_char
; }
89 ///////////////////////////////////////////////////////////////////////////
91 ///////////////////////////////////////////////////////////////////////////
93 virtual void set_buffer (char *, size_t);
94 void set_buffer (char *);
95 inline void set_anchored(Bool a
= true) { anchored
= a
; }
96 inline void set_context (int c
= 0) { current_context
= c
; }
97 inline void push_back (int n
)
98 { if (saved_char
>= 0) *cursor_limit
= saved_char
;
105 ///////////////////////////////////////////////////////////////////////////
106 // Default method for filling the buffer.
107 // This should be redefined in derive classes
108 ///////////////////////////////////////////////////////////////////////////
109 virtual size_t fill_buffer();
110 virtual void end_of_file();
111 virtual void error(const char * start
, const char * stop
);
114 //////////////////////////////////////////////////////////////////////////////
115 // Tokenization method
116 //////////////////////////////////////////////////////////////////////////////
117 inline LexerBuffer::Rule
RegexMatch::MatchText
118 (State
, LexerBuffer
& B
, const char *) const
120 // restore the saved character
121 if (B
.saved_char
>= 0) *B
.cursor_limit
= B
.saved_char
;
122 // Locate the next token
124 Rule r
= MatchText (B
.current_context
*2 + (B
.anchored
? 2 : 1),
125 B
.cursor_limit
, B
.buffer_limit
- B
.cursor_limit
,
128 // No match, invoke the error handler
129 B
.error (B
.cursor_limit
, nxt
);
131 } else if (r
== -1) {
132 // Out of buffer, invoke the fill buffer handler
133 size_t len
= B
.fill_buffer();
134 if (len
== 0) return -1;
138 B
.cursor
= B
.cursor_limit
;
139 B
.cursor_limit
= (char*)nxt
;
140 B
.anchored
= B
.cursor_limit
[-1] == '\n';
141 B
.saved_char
= *B
.cursor_limit
;
142 B
.cursor_limit
[0] = '\0';