simple.cc - generated code example
[prop.git] / include / AD / automata / scanner.h
blobd227f34fc1dcfd8bfc0cf68af15fdb00c9aa0063
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef lexical_scanning_stream_h
26 #define lexical_scanning_stream_h
28 /////////////////////////////////////////////////////////////////////////////
29 // Class LexScanner implements a lexical scanner with buffering.
30 /////////////////////////////////////////////////////////////////////////////
32 #include <iostream>
33 #include <AD/generic/generic.h>
34 #include <AD/automata/lexer.h>
36 /////////////////////////////////////////////////////////////////////////////
37 // The implementation of the Lexer is hidden for safety.
38 /////////////////////////////////////////////////////////////////////////////
39 class LexScanner : private Lexer {
41 LexScanner(const LexScanner&); // no copy constructor
42 void operator = (const LexScanner&); // no assignment
44 public:
46 /////////////////////////////////////////////////////////////////////
47 // Make inherited types visible
48 /////////////////////////////////////////////////////////////////////
49 typedef Lexer Super;
50 typedef Super::State State;
51 typedef Super::Symbol Symbol;
52 typedef Super::Offset Offset;
53 typedef Super::Rule Rule;
55 /////////////////////////////////////////////////////////////////////
56 // Options
57 /////////////////////////////////////////////////////////////////////
58 enum Option
59 { Line_buffering = 0, // use line buffering (for interactive scanners)
60 Block_buffering = 1 // use block buffering (faster)
63 protected:
65 ////////////////////////////////////////////////////////////////////////
66 // Buffers and states information.
67 ////////////////////////////////////////////////////////////////////////
68 char * buffer; // caches the last bunch of input
69 char * limit; // end of buffer
70 char * read_limit; // end of data
71 char * cursor; // points to the next input character
72 char * tok; // points to the current token
73 Bool anchored; // are we at the beginning of the line?
74 int len; // the length of the current token
75 int save_char; // character saved
76 int current_context;// current context
78 /////////////////////////////////////////////////////////////////////
79 // Method to replenish the input buffer. Returns the number of
80 // characters read.
81 /////////////////////////////////////////////////////////////////////
82 virtual int fill_buffer(std::istream& stream, int read_size, Option option);
84 public:
86 ////////////////////////////////////////////////////////////////////////
87 // Constructor and destructor
88 ////////////////////////////////////////////////////////////////////////
89 LexScanner( const Offset base_table [],
90 const State check_table [],
91 const State def_table [],
92 const State next_table [],
93 const Rule rule_table [],
94 const unsigned char equiv_table []
96 : Lexer(base_table, check_table, def_table, next_table,
97 rule_table, equiv_table),
98 buffer(0), limit(0), read_limit(0), cursor(0) { open(); }
99 virtual ~LexScanner() { close(); }
101 ////////////////////////////////////////////////////////////////////////
102 // Scanning:
103 // open() -- begin scanning (set up buffer, etc)
104 // close() -- end scanning (clean up buffer, etc)
105 // begin() -- start at a new context
106 // advance() -- scan and get the next rule
107 // fast_advance() -- scan without backtracking
108 ////////////////////////////////////////////////////////////////////////
109 virtual void open();
110 virtual void close();
111 void begin(int context = 0) { current_context = context; }
112 Rule advance(std::istream&, int = 4096, Option = Line_buffering);
113 Rule fast_advance(std::istream&, int = 4096, Option = Line_buffering);
115 ////////////////////////////////////////////////////////////////////////
116 // Buffer management
117 // token() -- return the current matched token
118 // length() -- return the length of the current token
119 // peek_buffer() -- return the cached input characters
120 // peek_length() -- return the number of characters cached
121 ////////////////////////////////////////////////////////////////////////
122 char * token() const { return tok; }
123 int length() const { return len; }
124 char * peek_buffer() const { return cursor; }
125 int peek_length() const { return read_limit - cursor; }
127 ////////////////////////////////////////////////////////////////////////
128 // Miscellaneous:
129 // error() -- print error message and abort
130 ////////////////////////////////////////////////////////////////////////
131 virtual void error(const char file_name[], int line_number);
134 ////////////////////////////////////////////////////////////////////////////
135 // For efficiency, the scanning functions are inlined.
136 ////////////////////////////////////////////////////////////////////////////
138 /////////////////////////////////////////////////////////////////////////////
139 // Advance the machine and return the matching rule.
140 // The convention of accept state number is as follows:
141 // 0 --- not an accept state and have at least one outgoing state
142 // -1 --- not an accept state and have no outgoing state.
143 // r > 0 --- an accept state for rule $r-1$(zero based) with at least
144 // one outgoing state.
145 // r < -1 --- an accept state for rule $-r-2$ with no outgoing state.
147 // Returns the rule number (zero based) normally.
148 // Returns EOF if end of file is found.
149 // Returns EOF-1 if the scanner is jammed.
150 /////////////////////////////////////////////////////////////////////////////
151 inline Lexer::Rule LexScanner::advance
152 (std::istream& stream, int read_size, LexScanner::Option option)
153 { register State s = current_context * 2 + (anchored ? 2 : 1);
154 register unsigned char c;
155 register Rule r = 0, last_accept_rule;
156 register char * last_accept_pos;
158 if (save_char >= 0) *cursor = save_char;
159 last_accept_rule = 0;
160 last_accept_pos = 0;
161 tok = cursor;
163 for (;;) {
164 if (cursor == read_limit)
165 if (fill_buffer(stream, read_size, option) == 0) {
166 if (tok == read_limit) {
167 len = 0; tok = 0; return EOF;
168 } else {
169 r = -r-1; goto do_it;
172 c = (unsigned char)*cursor++;
173 s = go (s, c);
174 r = accept(s);
176 do_it:
177 if (r > 0) { // a backtrackable accept state; keep looking ahead
178 last_accept_rule = r;
179 last_accept_pos = cursor;
180 } else if (r == 0) { // not an accept state; scan further
181 // scan scan scan
182 } else if (r == -1) { // not an accept state and is a dead end
183 if (last_accept_pos != 0) { // backtrack
184 cursor = last_accept_pos;
185 len = cursor - tok;
186 save_char = *cursor;
187 *cursor = '\0';
188 anchored = cursor[-1] == '\n';
189 return last_accept_rule - 1;
190 } else { // no backtrackable position, error!!!
191 tok = 0; len = 0; // nothing possible
192 return EOF-1;
194 } else { // a dead end accept state is found
195 len = cursor - tok;
196 save_char = *cursor;
197 *cursor = '\0';
198 anchored = cursor[-1] == '\n';
199 return -r-2;
204 /////////////////////////////////////////////////////////////////////////////
205 // This is similar to above except that we don't do any backtracking: i.e.
206 // we return the first rule that matches rather than the first rule that
207 // matches the longest. The scanning algorithm is simpler and faster
208 // with this routine. The lexer should be generated without using the
209 // Backtracking option.
210 /////////////////////////////////////////////////////////////////////////////
211 inline Lexer::Rule LexScanner::fast_advance
212 (std::istream& stream, int read_size, LexScanner::Option option)
213 { register State s = current_context * 2 + (anchored ? 2 : 1);
214 tok = cursor;
215 if (save_char >= 0) *cursor = save_char;
216 for (;;) {
217 if (cursor == read_limit)
218 if (fill_buffer(stream, read_size, option) == 0) {
219 if (tok == read_limit) { // end of file
220 tok = 0; len = 0; return EOF;
221 } else { // error
222 tok = 0; len = 0; return EOF-1;
225 register unsigned char c = (unsigned char)*cursor++;
226 if ((s = go (s, c)) == 0) { // error state??
227 tok = 0; len = 0; // nothing possible
228 return EOF-1;
230 register Rule r = accept(s);
231 if (r > 0) {
232 len = cursor - tok;
233 save_char = *cursor;
234 *cursor = '\0';
235 anchored = cursor[-1] == '\n';
236 return r-1;
241 #endif