1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
23 //////////////////////////////////////////////////////////////////////////////
29 #include <AD/automata/lexergen.h> // Lexical scanner generator
30 #include <AD/automata/nfa.h> // NFAs
31 #include <AD/automata/nfa_node.h> // NFA nodes
32 #include <AD/automata/gentable.h> // Table emitter
33 #include <AD/strings/charesc.h> // Escape sequence parsing
34 #include <AD/memory/mempool.h> // Memory pools
35 #include <AD/hash/dchash.h> // Direct chaining hash table
36 #include <AD/contain/vararray.h> // Generic arrays
37 #include <AD/contain/bitset.h> // bit set
38 #include <AD/contain/fbitset.h> // fast bit set
40 ////////////////////////////////////////////////////////////////////////////
41 // Make hidden types visible
42 ////////////////////////////////////////////////////////////////////////////
43 typedef LexerGen::Symbol Symbol
;
44 typedef LexerGen::State State
;
45 typedef LexerGen::Rule Rule
;
46 typedef LexerGen::Options Options
;
48 ////////////////////////////////////////////////////////////////////////////
49 // Constructor and destructor
50 ////////////////////////////////////////////////////////////////////////////
52 : bytes_used(0), bad_rule(-1), error_msg(0), rule(0), equiv_classes(0) {}
53 LexerGen::~LexerGen() { delete [] rule
; delete [] equiv_classes
; }
55 ////////////////////////////////////////////////////////////////////////////
56 // To create a set of tables
57 ////////////////////////////////////////////////////////////////////////////
58 void LexerGen::create_tables(int size
, int states
, Symbol min
, Symbol max
)
59 { Super::create_tables(size
,states
,min
,max
);
60 rule
= new Rule
[states
];
63 ////////////////////////////////////////////////////////////////////////////
64 // To grow the number of states
65 ////////////////////////////////////////////////////////////////////////////
66 void LexerGen::grow_states(int increment
)
67 { Rule
* new_rule
= new Rule
[ number_of_states
+ increment
];
68 memcpy(new_rule
,rule
,sizeof(Rule
) * number_of_states
);
71 Super::grow_states(increment
);
74 ////////////////////////////////////////////////////////////////////////////
75 // The main entry point of the lexical scanner compiler
76 ////////////////////////////////////////////////////////////////////////////
77 void LexerGen::compile
78 ( int n
, const char * const * regexps
, const char * const * contexts
,
79 int max_ch
, int options
82 ////////////////////////////////////////////////////////////////////////
83 // Here are the tables and maps that we'll need.
84 ////////////////////////////////////////////////////////////////////////
85 MemPool
mem(4096); // Memory pool with page size of 4K
86 DCHashTable
<FastBitSet
*,State
> dstates(1023,1.0);
87 // Map sets of nfa states to dfa states
88 VarArray
<FastBitSet
*> nstates
; // Inverse of above
89 VarArray
<Rule
> accept_rule
; // Accept rule of state, if any
90 NFA
nfa(mem
); // create the nfa
92 ////////////////////////////////////////////////////////////////////////
93 // Compile the regexps into an NFA
94 ////////////////////////////////////////////////////////////////////////
95 nfa
.compile(n
, regexps
, contexts
, max_ch
, options
& CaseInsensitive
);
97 error_msg
= nfa
.error_msg();
100 ////////////////////////////////////////////////////////////////////////
101 // Compile the equivalence class table.
102 ////////////////////////////////////////////////////////////////////////
104 equiv_classes
= nfa
.get_equiv_classes();
106 ////////////////////////////////////////////////////////////////////////
107 // Start generating the tables.
108 ////////////////////////////////////////////////////////////////////////
109 create_tables(nfa
.state_count() * nfa
.equiv_class_count(),
110 nfa
.state_count(), 0, nfa
.equiv_class_count() - 1);
113 ////////////////////////////////////////////////////////////////////////
114 // Create an error state. This state always jams
115 ////////////////////////////////////////////////////////////////////////
116 { FastBitSet
* err
= new (mem
) FastBitSet(mem
, nfa
.state_count());
117 dstates
.insert(err
,(State
)0);
119 rule
[0] = (options
& Backtracking
) ? -1 : 0;
122 ////////////////////////////////////////////////////////////////////////
123 // Create two start states for each context.
124 ////////////////////////////////////////////////////////////////////////
125 { for (State s
= 1; s
<= nfa
.context_count() * 2 + 2; s
++)
126 { NFA_Node
* theNFA
= nfa
.context_nfa(s
-1);
131 FastBitSet
* set
= new (mem
) FastBitSet(mem
, nfa
.state_count());
132 theNFA
->closure(set
);
133 accept_rule
[s
] = NFA_Node::accept_thinning(set
,n
);
134 dstates
.insert(set
,s
);
140 ////////////////////////////////////////////////////////////////////////
141 // Out going transitions indexed by equiv classes.
142 ////////////////////////////////////////////////////////////////////////
143 FastBitSet
** trans
=
144 (FastBitSet
**)mem
.m_alloc
145 (sizeof(FastBitSet
*) * nfa
.equiv_class_count());
146 State
* delta
= // out going state
147 (State
*)mem
.c_alloc(sizeof(State
) * nfa
.equiv_class_count());
148 { for (int i
= nfa
.equiv_class_count() - 1; i
>= 0; i
--)
149 trans
[i
] = new (mem
) FastBitSet(mem
, nfa
.state_count());
152 //{ for (int i = 0; i < nfa.state_count(); i++)
153 // if (nfa[i]) cerr << "State " << i << " = " << *nfa[i]->delta() << '\n';
156 ////////////////////////////////////////////////////////////////////////
157 // Compute the rest of the states
158 ////////////////////////////////////////////////////////////////////////
159 State number_of_dfa_states
= nfa
.context_count() * 2 + 3;
160 for (State s
= 1; s
< number_of_dfa_states
; s
++)
161 { register FastBitSet
* set
= nstates
[s
];
162 if (set
== 0) continue;
163 Bool is_jammed
= true; // assume no out transition for now.
165 /////////////////////////////////////////////////////////////////////
166 // Compute transitions from this state
167 /////////////////////////////////////////////////////////////////////
168 { FastBitSet::Iter
iter(*set
);
170 { register int i
= iter
.next();
172 NFA_Delta
* node
= nfa
[i
];
173 if (node
) // delta node
174 { Symbol c
= node
->symbol();
176 { node
->add_delta(trans
[equiv_classes
[c
]]);
179 { const FastBitSet
& chars
= *nfa
.char_class_map(c
);
181 for (ch
= nfa
.equiv_class_count() - 1; ch
>= 0; ch
--)
182 { if (chars
[ch
]) node
->add_delta(trans
[ch
]); }
188 /////////////////////////////////////////////////////////////////////
190 /////////////////////////////////////////////////////////////////////
191 for (int i
= nfa
.equiv_class_count() - 1; i
>= 0; i
--)
192 { Ix d
= dstates
.lookup(trans
[i
]);
195 delta
[i
] = dstates
.value(d
);
196 if (delta
[i
] != 0) is_jammed
= false;
199 { // create a new state
200 Rule r
= NFA_Node::accept_thinning(trans
[i
],n
);
201 dstates
.insert(trans
[i
], number_of_dfa_states
);
202 nstates
[number_of_dfa_states
] = trans
[i
];
203 accept_rule
[number_of_dfa_states
] = r
;
204 trans
[i
] = new (mem
) FastBitSet(mem
, nfa
.state_count());
205 delta
[i
] = number_of_dfa_states
++;
207 // cerr << "State " << s << " = " << *set
208 // << " n = " << n << " rule = " << r << "\n";
212 /////////////////////////////////////////////////////////////////////
213 // insert transition into table.
214 /////////////////////////////////////////////////////////////////////
217 /////////////////////////////////////////////////////////////////////
218 // The rule table actually contains information concerning whether
219 // a state has any out going states.
220 /////////////////////////////////////////////////////////////////////
221 if ((options
& Backtracking
) && is_jammed
)
222 rule
[s
] = -accept_rule
[s
]-1;
224 rule
[s
] = accept_rule
[s
];
227 ////////////////////////////////////////////////////////////////////////
228 // Finish the table compression
229 ////////////////////////////////////////////////////////////////////////
232 ////////////////////////////////////////////////////////////////////////
233 // Compute statistics
234 ////////////////////////////////////////////////////////////////////////
235 bytes_used
= mem
.bytes_used();
236 equiv_class_count
= nfa
.equiv_class_count();
239 ////////////////////////////////////////////////////////////////////////////
241 ////////////////////////////////////////////////////////////////////////////
242 ostream
& LexerGen::gen_code(ostream
& out
, const char name
[]) const
244 Super::gen_code(out
,name
);
246 pr
.print(out
, (const char *)equiv_classes
,
247 max_char
+ 1, sizeof(Symbol
),
248 "static const unsigned char ", name
, "equiv_classes", true);
249 pr
.print(out
, (const char *)rule
,
250 max_state
+ 1, sizeof(Rule
),
251 "static const DFATables::Rule ", name
, "accept_rule");
256 ////////////////////////////////////////////////////////////////////////////
257 // Method to generate a report.
258 ////////////////////////////////////////////////////////////////////////////
259 ostream
& LexerGen::print_report(ostream
& log
) const
260 { log
<< "Lexer statistics:"
261 "\nNumber of states = " << (max_state
+ 1)
262 << "\nNumber of next/check entries = " << (max_check
+ 1)
263 << "\nNumber of equivalence character classes = " << equiv_class_count
265 << ((bytes_used
+ 512)/1024) << "(k)\n\n";