initial
[prop.git] / include / AD / automata / lexerbuf.h
blob76101f8f760c96dcc722ea86efc4bc2f1239ee94
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are welcomed to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome(read crave for)
19 // any suggestions and help from the users.
21 // Allen Leung (leunga@cs.nyu.edu)
22 // 1994-1995
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef lexer_buffer_h
26 #define lexer_buffer_h
28 #include <stdlib.h>
29 #include <AD/generic/generic.h> // generic definitions
30 #include <AD/automata/dfatable.h> // automata tables
31 #include <AD/automata/regexmat.h> // regular expression string matcher
33 //////////////////////////////////////////////////////////////////////////////
34 // This calls implements a backtrackable buffer for implementing
35 // scanners.
36 //////////////////////////////////////////////////////////////////////////////
37 class LexerBuffer : public DFATables {
38 public:
39 ///////////////////////////////////////////////////////////////////////////
40 // Type definitions
41 ///////////////////////////////////////////////////////////////////////////
42 typedef DFATables Super;
43 typedef Super::Symbol Symbol;
44 typedef Super::State State;
45 typedef Super::Offset Offset;
46 typedef Super::Rule Rule;
48 protected:
49 ///////////////////////////////////////////////////////////////////////////
50 // Internal buffer and other members
51 ///////////////////////////////////////////////////////////////////////////
52 friend class RegexMatch;
53 char * buffer; // buffer
54 char * buffer_limit; // end of the buffer
55 char * cursor; // pointer to current token
56 char * cursor_limit; // pointer to the end of the token
57 Bool anchored; // are we at the beginning of the line.
58 Bool pinned; // is the buffer pinned?
59 Bool more_input; // has more input?
60 int saved_char; // saved last character from last token.
61 int current_context; // current context number
63 void init();
65 public:
66 ///////////////////////////////////////////////////////////////////////////
67 // Constructor and destructor
68 ///////////////////////////////////////////////////////////////////////////
69 LexerBuffer();
70 LexerBuffer(char *);
71 LexerBuffer(char *, size_t);
72 virtual ~LexerBuffer();
74 ///////////////////////////////////////////////////////////////////////////
75 // Some selectors
76 ///////////////////////////////////////////////////////////////////////////
77 inline int capacity () const { return buffer_limit - buffer; }
78 inline int length () const { return cursor_limit - cursor; }
79 inline operator const char * () const { return cursor; }
80 inline operator char * () { return cursor; }
81 inline int context () const { return current_context; }
82 inline Bool is_anchored() const { return anchored; }
83 inline const char * text () const { return cursor; }
84 inline char * text () { return cursor; }
85 inline char operator [] (int i) const { return cursor[i]; }
86 inline char& operator [] (int i) { return cursor[i]; }
87 inline int lookahead () const { return saved_char; }
89 ///////////////////////////////////////////////////////////////////////////
90 // Mutators
91 ///////////////////////////////////////////////////////////////////////////
92 virtual void reset();
93 virtual void set_buffer (char *, size_t);
94 void set_buffer (char *);
95 inline void set_anchored(Bool a = true) { anchored = a; }
96 inline void set_context (int c = 0) { current_context = c; }
97 inline void push_back (int n)
98 { if (saved_char >= 0) *cursor_limit = saved_char;
99 cursor_limit -= n;
100 saved_char = -1;
104 protected:
105 ///////////////////////////////////////////////////////////////////////////
106 // Default method for filling the buffer.
107 // This should be redefined in derive classes
108 ///////////////////////////////////////////////////////////////////////////
109 virtual size_t fill_buffer();
110 virtual void end_of_file();
111 virtual void error(const char * start, const char * stop);
114 //////////////////////////////////////////////////////////////////////////////
115 // Tokenization method
116 //////////////////////////////////////////////////////////////////////////////
117 inline LexerBuffer::Rule RegexMatch::MatchText
118 (State, LexerBuffer& B, const char *) const
119 { const char * nxt;
120 // restore the saved character
121 if (B.saved_char >= 0) *B.cursor_limit = B.saved_char;
122 // Locate the next token
123 for (;;) {
124 Rule r = MatchText (B.current_context*2 + (B.anchored ? 2 : 1),
125 B.cursor_limit, B.buffer_limit - B.cursor_limit,
126 nxt, B.more_input);
127 if (r == 0) {
128 // No match, invoke the error handler
129 B.error (B.cursor_limit, nxt);
130 return -1;
131 } else if (r == -1) {
132 // Out of buffer, invoke the fill buffer handler
133 size_t len = B.fill_buffer();
134 if (len == 0) return -1;
135 } else {
136 // Got a rule.
137 // Compute info
138 B.cursor = B.cursor_limit;
139 B.cursor_limit = (char*)nxt;
140 B.anchored = B.cursor_limit[-1] == '\n';
141 B.saved_char = *B.cursor_limit;
142 B.cursor_limit[0] = '\0';
143 return r;
148 #endif