initial
[prop.git] / include / AD / automata / acgen.h
blob32f507fad804c18c261e177b2d9ff3d3a5b8eed3
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef prop_Aho_Corasick_string_matcher_generator_h
26 #define prop_Aho_Corasick_string_matcher_generator_h
28 //////////////////////////////////////////////////////////////////////////////
29 // The Aho-Corasick keyword matching algorithm\cite{AC}. The resulting
30 // automaton is represented as a sparse DFA table. This data structure
31 // makes it possible to use this algorithm for matching large sets
32 // of keys with large character sets while utilizing memory efficiently.
33 //////////////////////////////////////////////////////////////////////////////
35 #include <AD/automata/sparsdfa.h> // sparse compressed dfa
36 #include <AD/tries/briandai.h>
38 class ACGen : public SparseDFA {
40 ACGen(const ACGen&); // no copy constructor
41 void operator = (const ACGen&); // no assignment
43 public:
45 ///////////////////////////////////////////////////////////////////////////
46 // Make inherited types visible
47 ///////////////////////////////////////////////////////////////////////////
48 typedef SparseDFA Super;
49 typedef Super::Rule Rule;
50 typedef Super::State State;
51 typedef Super::Symbol Symbol;
52 typedef Super::Offset Offset;
53 typedef Briandais::Node Node;
55 enum ACGen_constants { Empty_Rule = -1 };
57 protected:
59 ///////////////////////////////////////////////////////////////////////////
60 // Internals
61 ///////////////////////////////////////////////////////////////////////////
62 Briandais * trie; // De la Briandais trie
63 int number_of_states; // number of states within the automata
64 State * epsilon; // the failure function computes epsilon transitions
65 Rule * matched_rule;
67 void generate_goto(Node, int, Symbol [], State []);
69 public:
71 ///////////////////////////////////////////////////////////////////////////
72 // Constructor and destructor
73 ///////////////////////////////////////////////////////////////////////////
74 ACGen();
75 ~ACGen();
77 ///////////////////////////////////////////////////////////////////////////
78 // Return the number of states
79 ///////////////////////////////////////////////////////////////////////////
80 int size() const { return number_of_states; }
82 ///////////////////////////////////////////////////////////////////////////
83 // Methods to build the compressed tables
84 ///////////////////////////////////////////////////////////////////////////
85 virtual void start (Symbol min = Max_Symbol, Symbol max = Min_Symbol);
86 virtual void add_string (Rule rule, int length, const Symbol string []);
87 virtual void finish ();
89 ///////////////////////////////////////////////////////////////////////////
90 // Methods to emit C++ code
91 ///////////////////////////////////////////////////////////////////////////
92 virtual ostream& gen_code(ostream&, const char []) const;
93 void gen_fast_automaton(ostream&, const char []) const;
95 ///////////////////////////////////////////////////////////////////////////
96 // Return the start state
97 ///////////////////////////////////////////////////////////////////////////
98 State start_state() const { return 1; }
100 ///////////////////////////////////////////////////////////////////////////
101 // Advance one state within the Aho-Corasick automaton.
102 ///////////////////////////////////////////////////////////////////////////
103 inline State go (register State s, register Symbol c) const
104 { register Offset offset;
105 while (check[offset = base[s] + c] != s) s = epsilon[s];
106 return next[offset];
110 #endif