1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are welcomed to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome(read crave for)
19 // any suggestions and help from the users.
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef DFA_based_regular_expression_string_matcher_h
26 #define DFA_based_regular_expression_string_matcher_h
28 #include <AD/generic/generic.h>
29 #include <AD/automata/lexer.h>
31 //////////////////////////////////////////////////////////////////////////////
32 // Class |RegexMatch| implements a dfa based regular expression
34 //////////////////////////////////////////////////////////////////////////////
35 class RegexMatch
: public Lexer
{
37 RegexMatch(const RegexMatch
&); // no copy constructor
38 void operator = (const RegexMatch
&); // no assignment
42 ///////////////////////////////////////////////////////////////////////////
44 ///////////////////////////////////////////////////////////////////////////
46 typedef Super::Symbol Symbol
;
47 typedef Super::State State
;
48 typedef Super::Offset Offset
;
49 typedef Super::Rule Rule
;
53 ///////////////////////////////////////////////////////////////////////////
55 ///////////////////////////////////////////////////////////////////////////
56 RegexMatch( const Offset base_table
[],
57 const State check_table
[],
58 const State def_table
[],
59 const State next_table
[],
60 const Rule rule_table
[],
61 const unsigned char equiv_table
[]
63 : Lexer(base_table
, check_table
, def_table
, next_table
,
64 rule_table
, equiv_table
) {}
66 ///////////////////////////////////////////////////////////////////////////
67 // String matching operations.
68 // Returns the matching rule number (which is compiled in the tables).
69 // If more than one rule matches that the highest priority rule is
70 // returned. Rules are numbered from 1 - n.
72 // If nothing matches then 0 is returned.
73 ///////////////////////////////////////////////////////////////////////////
74 inline Rule
MatchText(const char *) const;
75 inline Rule
MatchText(const char *, int) const;
76 inline Rule
MatchText(const char *, const char *&) const;
77 inline Rule
MatchText(const char *, int, const char *&) const;
78 inline Rule
MatchText(State
, const char *, const char *&) const;
79 inline Rule
MatchText(State
, const char *, int, const char *&, Bool
= false) const;
80 Rule
MatchText(State
, class LexerBuffer
&, const char *) const;
83 //////////////////////////////////////////////////////////////////////////////
84 // Matching operation on strings. The entire string is scanned and no
85 // backtracking is performed. The string must be null-terminated.
86 //////////////////////////////////////////////////////////////////////////////
87 inline RegexMatch::Rule
RegexMatch::MatchText(register const char * string
) const
88 { register State state
;
89 register unsigned char c
;
90 for (state
= start_state
; (c
= *string
) != '\0'; string
++)
91 if ((state
= go(state
,c
)) == 0) return 0;
95 //////////////////////////////////////////////////////////////////////////////
96 // Matching operations on strings. This is the same as the previous
97 // version except that an extra length parameters is used. Nul's may
98 // be embedded in the string.
99 //////////////////////////////////////////////////////////////////////////////
100 inline RegexMatch::Rule
RegexMatch::MatchText
101 (register const char * string
, register int len
) const
102 { register State state
;
103 for (state
= start_state
; len
> 0; len
--) {
104 register unsigned char c
= *string
++;
105 if ((state
= go(state
,c
)) == 0) return 0;
107 return accept(state
);
110 //////////////////////////////////////////////////////////////////////////////
111 // Matching operation on string.
112 // This version assumes backtracking is used. We'll only try to Match
113 // the longest substring. A pointer to the rest of the string is returned.
114 // The user must also supply a start state.
116 // The special code -1 is returned if the buffer space has run out
117 // without reaching the end of the stream.
119 // Caution: the tables must be generated with the backtracking option.
120 //////////////////////////////////////////////////////////////////////////////
121 inline RegexMatch::Rule
RegexMatch::MatchText
122 (register State state
, register const char * s
, const char *& nxt
) const
123 { register unsigned char c
;
125 const char * last_position
;
127 { register Rule r
= accept(state
= go(state
,c
= *s
++));
128 if (r
> 0) { // state is backtrackable
129 last_rule
= r
; last_position
= s
;
130 } else if (r
== -1) { // dead end state
131 // try to backtrack if possible
133 // we have a last rule and position, just use it dude!
137 // problem, return 0 for error
138 // next is set to the error position.
142 } else if (r
< -1) { // last posible accept state
143 // this is the last possible position
149 // we have run out of buffer
150 // return -1 to signal this condition
154 //////////////////////////////////////////////////////////////////////////////
155 // The version assumes that the buffer length has been explicitly
157 //////////////////////////////////////////////////////////////////////////////
158 inline RegexMatch::Rule
RegexMatch::MatchText
167 const char * last_position
;
175 { state
= go(state
,*s
++);
178 if (r
> 0) { // state is backtrackable
179 last_rule
= r
; last_position
= s
; --len
;
180 } else if (r
== -1) { // dead end state
182 // try to back track if possible
184 // we have a last rule and position, just use it dude!
188 // problem, return 0 for error
189 // n is set to the error position.
193 } else if (r
< -1) { // last posible accept state
194 // this is the last possible position
200 // we have run out of buffer
201 // return -1 to signal this condition
205 //////////////////////////////////////////////////////////////////////////////
206 // These version assumes the default start state should be used.
207 //////////////////////////////////////////////////////////////////////////////
208 inline RegexMatch::Rule
RegexMatch::MatchText(const char * s
, const char *& n
) const
209 { return MatchText (start_state
, s
, n
); }
210 inline RegexMatch::Rule
RegexMatch::MatchText(const char * s
, int len
, const char *& n
) const
211 { return MatchText (start_state
, s
, len
, n
); }