1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef prop_Aho_Corasick_string_matcher_generator_h
26 #define prop_Aho_Corasick_string_matcher_generator_h
28 //////////////////////////////////////////////////////////////////////////////
29 // The Aho-Corasick keyword matching algorithm\cite{AC}. The resulting
30 // automaton is represented as a sparse DFA table. This data structure
31 // makes it possible to use this algorithm for matching large sets
32 // of keys with large character sets while utilizing memory efficiently.
33 //////////////////////////////////////////////////////////////////////////////
35 #include <AD/automata/sparsdfa.h> // sparse compressed dfa
36 #include <AD/tries/briandai.h>
38 class ACGen
: public SparseDFA
{
40 ACGen(const ACGen
&); // no copy constructor
41 void operator = (const ACGen
&); // no assignment
45 ///////////////////////////////////////////////////////////////////////////
46 // Make inherited types visible
47 ///////////////////////////////////////////////////////////////////////////
48 typedef SparseDFA Super
;
49 typedef Super::Rule Rule
;
50 typedef Super::State State
;
51 typedef Super::Symbol Symbol
;
52 typedef Super::Offset Offset
;
53 typedef Briandais::Node Node
;
55 enum ACGen_constants
{ Empty_Rule
= -1 };
59 ///////////////////////////////////////////////////////////////////////////
61 ///////////////////////////////////////////////////////////////////////////
62 Briandais
* trie
; // De la Briandais trie
63 int number_of_states
; // number of states within the automata
64 State
* epsilon
; // the failure function computes epsilon transitions
67 void generate_goto(Node
, int, Symbol
[], State
[]);
71 ///////////////////////////////////////////////////////////////////////////
72 // Constructor and destructor
73 ///////////////////////////////////////////////////////////////////////////
77 ///////////////////////////////////////////////////////////////////////////
78 // Return the number of states
79 ///////////////////////////////////////////////////////////////////////////
80 int size() const { return number_of_states
; }
82 ///////////////////////////////////////////////////////////////////////////
83 // Methods to build the compressed tables
84 ///////////////////////////////////////////////////////////////////////////
85 virtual void start (Symbol min
= Max_Symbol
, Symbol max
= Min_Symbol
);
86 virtual void add_string (Rule rule
, int length
, const Symbol string
[]);
87 virtual void finish ();
89 ///////////////////////////////////////////////////////////////////////////
90 // Methods to emit C++ code
91 ///////////////////////////////////////////////////////////////////////////
92 virtual ostream
& gen_code(ostream
&, const char []) const;
93 void gen_fast_automaton(ostream
&, const char []) const;
95 ///////////////////////////////////////////////////////////////////////////
96 // Return the start state
97 ///////////////////////////////////////////////////////////////////////////
98 State
start_state() const { return 1; }
100 ///////////////////////////////////////////////////////////////////////////
101 // Advance one state within the Aho-Corasick automaton.
102 ///////////////////////////////////////////////////////////////////////////
103 inline State
go (register State s
, register Symbol c
) const
104 { register Offset offset
;
105 while (check
[offset
= base
[s
] + c
] != s
) s
= epsilon
[s
];