1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef lexical_scanning_stream_h
26 #define lexical_scanning_stream_h
28 /////////////////////////////////////////////////////////////////////////////
29 // Class LexScanner implements a lexical scanner with buffering.
30 /////////////////////////////////////////////////////////////////////////////
33 #include <AD/generic/generic.h>
34 #include <AD/automata/lexer.h>
36 /////////////////////////////////////////////////////////////////////////////
37 // The implementation of the Lexer is hidden for safety.
38 /////////////////////////////////////////////////////////////////////////////
39 class LexScanner
: private Lexer
{
41 LexScanner(const LexScanner
&); // no copy constructor
42 void operator = (const LexScanner
&); // no assignment
46 /////////////////////////////////////////////////////////////////////
47 // Make inherited types visible
48 /////////////////////////////////////////////////////////////////////
50 typedef Super::State State
;
51 typedef Super::Symbol Symbol
;
52 typedef Super::Offset Offset
;
53 typedef Super::Rule Rule
;
55 /////////////////////////////////////////////////////////////////////
57 /////////////////////////////////////////////////////////////////////
59 { Line_buffering
= 0, // use line buffering (for interactive scanners)
60 Block_buffering
= 1 // use block buffering (faster)
65 ////////////////////////////////////////////////////////////////////////
66 // Buffers and states information.
67 ////////////////////////////////////////////////////////////////////////
68 char * buffer
; // caches the last bunch of input
69 char * limit
; // end of buffer
70 char * read_limit
; // end of data
71 char * cursor
; // points to the next input character
72 char * tok
; // points to the current token
73 Bool anchored
; // are we at the beginning of the line?
74 int len
; // the length of the current token
75 int save_char
; // character saved
76 int current_context
;// current context
78 /////////////////////////////////////////////////////////////////////
79 // Method to replenish the input buffer. Returns the number of
81 /////////////////////////////////////////////////////////////////////
82 virtual int fill_buffer(std::istream
& stream
, int read_size
, Option option
);
86 ////////////////////////////////////////////////////////////////////////
87 // Constructor and destructor
88 ////////////////////////////////////////////////////////////////////////
89 LexScanner( const Offset base_table
[],
90 const State check_table
[],
91 const State def_table
[],
92 const State next_table
[],
93 const Rule rule_table
[],
94 const unsigned char equiv_table
[]
96 : Lexer(base_table
, check_table
, def_table
, next_table
,
97 rule_table
, equiv_table
),
98 buffer(0), limit(0), read_limit(0), cursor(0) { open(); }
99 virtual ~LexScanner() { close(); }
101 ////////////////////////////////////////////////////////////////////////
103 // open() -- begin scanning (set up buffer, etc)
104 // close() -- end scanning (clean up buffer, etc)
105 // begin() -- start at a new context
106 // advance() -- scan and get the next rule
107 // fast_advance() -- scan without backtracking
108 ////////////////////////////////////////////////////////////////////////
110 virtual void close();
111 void begin(int context
= 0) { current_context
= context
; }
112 Rule
advance(std::istream
&, int = 4096, Option
= Line_buffering
);
113 Rule
fast_advance(std::istream
&, int = 4096, Option
= Line_buffering
);
115 ////////////////////////////////////////////////////////////////////////
117 // token() -- return the current matched token
118 // length() -- return the length of the current token
119 // peek_buffer() -- return the cached input characters
120 // peek_length() -- return the number of characters cached
121 ////////////////////////////////////////////////////////////////////////
122 char * token() const { return tok
; }
123 int length() const { return len
; }
124 char * peek_buffer() const { return cursor
; }
125 int peek_length() const { return read_limit
- cursor
; }
127 ////////////////////////////////////////////////////////////////////////
129 // error() -- print error message and abort
130 ////////////////////////////////////////////////////////////////////////
131 virtual void error(const char file_name
[], int line_number
);
134 ////////////////////////////////////////////////////////////////////////////
135 // For efficiency, the scanning functions are inlined.
136 ////////////////////////////////////////////////////////////////////////////
138 /////////////////////////////////////////////////////////////////////////////
139 // Advance the machine and return the matching rule.
140 // The convention of accept state number is as follows:
141 // 0 --- not an accept state and have at least one outgoing state
142 // -1 --- not an accept state and have no outgoing state.
143 // r > 0 --- an accept state for rule $r-1$(zero based) with at least
144 // one outgoing state.
145 // r < -1 --- an accept state for rule $-r-2$ with no outgoing state.
147 // Returns the rule number (zero based) normally.
148 // Returns EOF if end of file is found.
149 // Returns EOF-1 if the scanner is jammed.
150 /////////////////////////////////////////////////////////////////////////////
151 inline Lexer::Rule
LexScanner::advance
152 (std::istream
& stream
, int read_size
, LexScanner::Option option
)
153 { register State s
= current_context
* 2 + (anchored
? 2 : 1);
154 register unsigned char c
;
155 register Rule r
= 0, last_accept_rule
;
156 register char * last_accept_pos
;
158 if (save_char
>= 0) *cursor
= save_char
;
159 last_accept_rule
= 0;
164 if (cursor
== read_limit
)
165 if (fill_buffer(stream
, read_size
, option
) == 0) {
166 if (tok
== read_limit
) {
167 len
= 0; tok
= 0; return EOF
;
169 r
= -r
-1; goto do_it
;
172 c
= (unsigned char)*cursor
++;
177 if (r
> 0) { // a backtrackable accept state; keep looking ahead
178 last_accept_rule
= r
;
179 last_accept_pos
= cursor
;
180 } else if (r
== 0) { // not an accept state; scan further
182 } else if (r
== -1) { // not an accept state and is a dead end
183 if (last_accept_pos
!= 0) { // backtrack
184 cursor
= last_accept_pos
;
188 anchored
= cursor
[-1] == '\n';
189 return last_accept_rule
- 1;
190 } else { // no backtrackable position, error!!!
191 tok
= 0; len
= 0; // nothing possible
194 } else { // a dead end accept state is found
198 anchored
= cursor
[-1] == '\n';
204 /////////////////////////////////////////////////////////////////////////////
205 // This is similar to above except that we don't do any backtracking: i.e.
206 // we return the first rule that matches rather than the first rule that
207 // matches the longest. The scanning algorithm is simpler and faster
208 // with this routine. The lexer should be generated without using the
209 // Backtracking option.
210 /////////////////////////////////////////////////////////////////////////////
211 inline Lexer::Rule
LexScanner::fast_advance
212 (std::istream
& stream
, int read_size
, LexScanner::Option option
)
213 { register State s
= current_context
* 2 + (anchored
? 2 : 1);
215 if (save_char
>= 0) *cursor
= save_char
;
217 if (cursor
== read_limit
)
218 if (fill_buffer(stream
, read_size
, option
) == 0) {
219 if (tok
== read_limit
) { // end of file
220 tok
= 0; len
= 0; return EOF
;
222 tok
= 0; len
= 0; return EOF
-1;
225 register unsigned char c
= (unsigned char)*cursor
++;
226 if ((s
= go (s
, c
)) == 0) { // error state??
227 tok
= 0; len
= 0; // nothing possible
230 register Rule r
= accept(s
);
235 anchored
= cursor
[-1] == '\n';