1 //===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This class can produce a generic deterministic finite state automaton (DFA),
10 // given a set of possible states and transitions.
12 // The input transitions can be nondeterministic - this class will produce the
13 // deterministic equivalent state machine.
15 // The generated code can run the DFA and produce an accepted / not accepted
16 // state and also produce, given a sequence of transitions that results in an
17 // accepted state, the sequence of intermediate states. This is useful if the
18 // initial automaton was nondeterministic - it allows mapping back from the DFA
21 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "dfa-emitter"
24 #include "DFAEmitter.h"
25 #include "CodeGenTarget.h"
26 #include "SequenceToOffsetTable.h"
27 #include "TableGenBackends.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/UniqueVector.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
44 //===----------------------------------------------------------------------===//
45 // DfaEmitter implementation. This is independent of the GenAutomaton backend.
46 //===----------------------------------------------------------------------===//
48 void DfaEmitter::addTransition(state_type From
, state_type To
, action_type A
) {
50 NfaStates
.insert(From
);
52 NfaTransitions
[{From
, A
}].push_back(To
);
56 void DfaEmitter::visitDfaState(DfaState DS
) {
57 // For every possible action...
58 auto FromId
= DfaStates
.idFor(DS
);
59 for (action_type A
: Actions
) {
62 // For every represented state, word pair in the original NFA...
63 for (state_type
&FromState
: DS
) {
64 // If this action is possible from this state add the transitioned-to
65 // states to NewStates.
66 auto I
= NfaTransitions
.find({FromState
, A
});
67 if (I
== NfaTransitions
.end())
69 for (state_type
&ToState
: I
->second
) {
70 NewStates
.push_back(ToState
);
71 TI
.emplace_back(FromState
, ToState
);
74 if (NewStates
.empty())
78 NewStates
.erase(std::unique(NewStates
.begin(), NewStates
.end()),
81 TI
.erase(std::unique(TI
.begin(), TI
.end()), TI
.end());
82 unsigned ToId
= DfaStates
.insert(NewStates
);
83 DfaTransitions
.emplace(std::make_pair(FromId
, A
), std::make_pair(ToId
, TI
));
87 void DfaEmitter::constructDfa() {
88 DfaState
Initial(1, /*NFA initial state=*/0);
89 DfaStates
.insert(Initial
);
91 // Note that UniqueVector starts indices at 1, not zero.
92 unsigned DfaStateId
= 1;
93 while (DfaStateId
<= DfaStates
.size())
94 visitDfaState(DfaStates
[DfaStateId
++]);
97 void DfaEmitter::emit(StringRef Name
, raw_ostream
&OS
) {
100 OS
<< "// Input NFA has " << NfaStates
.size() << " states with "
101 << NumNfaTransitions
<< " transitions.\n";
102 OS
<< "// Generated DFA has " << DfaStates
.size() << " states with "
103 << DfaTransitions
.size() << " transitions.\n\n";
105 // Implementation note: We don't bake a simple std::pair<> here as it requires
106 // significantly more effort to parse. A simple test with a large array of
107 // struct-pairs (N=100000) took clang-10 6s to parse. The same array of
108 // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
109 // define the pair type.
111 // FIXME: It may make sense to emit these as ULEB sequences instead of
112 // pairs of uint64_t.
113 OS
<< "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
114 OS
<< "// transition implies a set of NFA transitions. These are referred\n";
115 OS
<< "// to by index in " << Name
<< "Transitions[].\n";
117 SequenceToOffsetTable
<DfaTransitionInfo
> Table
;
118 std::map
<DfaTransitionInfo
, unsigned> EmittedIndices
;
119 for (auto &T
: DfaTransitions
)
120 Table
.add(T
.second
.second
);
122 OS
<< "std::array<NfaStatePair, " << Table
.size() << "> " << Name
123 << "TransitionInfo = {{\n";
126 [](raw_ostream
&OS
, std::pair
<uint64_t, uint64_t> P
) {
127 OS
<< "{" << P
.first
<< ", " << P
.second
<< "}";
133 OS
<< "// A transition in the generated " << Name
<< " DFA.\n";
134 OS
<< "struct " << Name
<< "Transition {\n";
135 OS
<< " unsigned FromDfaState; // The transitioned-from DFA state.\n";
138 OS
<< " Action; // The input symbol that causes this transition.\n";
139 OS
<< " unsigned ToDfaState; // The transitioned-to DFA state.\n";
140 OS
<< " unsigned InfoIdx; // Start index into " << Name
141 << "TransitionInfo.\n";
144 OS
<< "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
145 OS
<< "// The initial state is 1, not zero.\n";
146 OS
<< "std::array<" << Name
<< "Transition, " << DfaTransitions
.size() << "> "
147 << Name
<< "Transitions = {{\n";
148 for (auto &KV
: DfaTransitions
) {
149 dfa_state_type From
= KV
.first
.first
;
150 dfa_state_type To
= KV
.second
.first
;
151 action_type A
= KV
.first
.second
;
152 unsigned InfoIdx
= Table
.get(KV
.second
.second
);
153 OS
<< " {" << From
<< ", ";
154 printActionValue(A
, OS
);
155 OS
<< ", " << To
<< ", " << InfoIdx
<< "},\n";
160 void DfaEmitter::printActionType(raw_ostream
&OS
) { OS
<< "uint64_t"; }
162 void DfaEmitter::printActionValue(action_type A
, raw_ostream
&OS
) { OS
<< A
; }
164 //===----------------------------------------------------------------------===//
165 // AutomatonEmitter implementation
166 //===----------------------------------------------------------------------===//
169 // FIXME: This entire discriminated union could be removed with c++17:
170 // using Action = std::variant<Record *, unsigned, std::string>;
174 std::string S
= nullptr;
177 Action(Record
*R
, unsigned I
, std::string S
) : R(R
), I(I
), S(S
) {}
179 void print(raw_ostream
&OS
) const {
183 OS
<< '"' << S
<< '"';
187 bool operator<(const Action
&Other
) const {
188 return std::make_tuple(R
, I
, S
) <
189 std::make_tuple(Other
.R
, Other
.I
, Other
.S
);
193 using ActionTuple
= std::vector
<Action
>;
198 // The tuple of actions that causes this transition.
200 // The types of the actions; this is the same across all transitions.
201 SmallVector
<std::string
, 4> Types
;
204 Transition(Record
*R
, Automaton
*Parent
);
205 const ActionTuple
&getActions() { return Actions
; }
206 SmallVector
<std::string
, 4> getTypes() { return Types
; }
208 bool canTransitionFrom(uint64_t State
);
209 uint64_t transitionFrom(uint64_t State
);
213 RecordKeeper
&Records
;
215 std::vector
<Transition
> Transitions
;
216 /// All possible action tuples, uniqued.
217 UniqueVector
<ActionTuple
> Actions
;
218 /// The fields within each Transition object to find the action symbols.
219 std::vector
<StringRef
> ActionSymbolFields
;
222 Automaton(RecordKeeper
&Records
, Record
*R
);
223 void emit(raw_ostream
&OS
);
225 ArrayRef
<StringRef
> getActionSymbolFields() { return ActionSymbolFields
; }
226 /// If the type of action A has been overridden (there exists a field
227 /// "TypeOf_A") return that, otherwise return the empty string.
228 StringRef
getActionSymbolType(StringRef A
);
231 class AutomatonEmitter
{
232 RecordKeeper
&Records
;
235 AutomatonEmitter(RecordKeeper
&R
) : Records(R
) {}
236 void run(raw_ostream
&OS
);
239 /// A DfaEmitter implementation that can print our variant action type.
240 class CustomDfaEmitter
: public DfaEmitter
{
241 const UniqueVector
<ActionTuple
> &Actions
;
242 std::string TypeName
;
245 CustomDfaEmitter(const UniqueVector
<ActionTuple
> &Actions
, StringRef TypeName
)
246 : Actions(Actions
), TypeName(TypeName
) {}
248 void printActionType(raw_ostream
&OS
) override
;
249 void printActionValue(action_type A
, raw_ostream
&OS
) override
;
253 void AutomatonEmitter::run(raw_ostream
&OS
) {
254 for (Record
*R
: Records
.getAllDerivedDefinitions("GenericAutomaton")) {
255 Automaton
A(Records
, R
);
256 OS
<< "#ifdef GET_" << R
->getName() << "_DECL\n";
258 OS
<< "#endif // GET_" << R
->getName() << "_DECL\n";
262 Automaton::Automaton(RecordKeeper
&Records
, Record
*R
)
263 : Records(Records
), R(R
) {
264 LLVM_DEBUG(dbgs() << "Emitting automaton for " << R
->getName() << "\n");
265 ActionSymbolFields
= R
->getValueAsListOfStrings("SymbolFields");
268 void Automaton::emit(raw_ostream
&OS
) {
269 StringRef TransitionClass
= R
->getValueAsString("TransitionClass");
270 for (Record
*T
: Records
.getAllDerivedDefinitions(TransitionClass
)) {
271 assert(T
->isSubClassOf("Transition"));
272 Transitions
.emplace_back(T
, this);
273 Actions
.insert(Transitions
.back().getActions());
276 LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions
.size()
278 LLVM_DEBUG(dbgs() << " Each state has " << Transitions
.size()
279 << " potential transitions.\n");
281 StringRef Name
= R
->getName();
283 CustomDfaEmitter
Emitter(Actions
, std::string(Name
) + "Action");
284 // Starting from the initial state, build up a list of possible states and
286 std::deque
<uint64_t> Worklist(1, 0);
287 std::set
<uint64_t> SeenStates
;
288 unsigned NumTransitions
= 0;
289 SeenStates
.insert(Worklist
.front());
290 while (!Worklist
.empty()) {
291 uint64_t State
= Worklist
.front();
292 Worklist
.pop_front();
293 for (Transition
&T
: Transitions
) {
294 if (!T
.canTransitionFrom(State
))
296 uint64_t NewState
= T
.transitionFrom(State
);
297 if (SeenStates
.emplace(NewState
).second
)
298 Worklist
.emplace_back(NewState
);
300 Emitter
.addTransition(State
, NewState
, Actions
.idFor(T
.getActions()));
303 LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates
.size()
304 << " states with " << NumTransitions
<< " transitions.\n");
306 const auto &ActionTypes
= Transitions
.back().getTypes();
307 OS
<< "// The type of an action in the " << Name
<< " automaton.\n";
308 if (ActionTypes
.size() == 1) {
309 OS
<< "using " << Name
<< "Action = " << ActionTypes
[0] << ";\n";
311 OS
<< "using " << Name
<< "Action = std::tuple<" << join(ActionTypes
, ", ")
316 Emitter
.emit(Name
, OS
);
319 StringRef
Automaton::getActionSymbolType(StringRef A
) {
320 Twine Ty
= "TypeOf_" + A
;
321 if (!R
->getValue(Ty
.str()))
323 return R
->getValueAsString(Ty
.str());
326 Transition::Transition(Record
*R
, Automaton
*Parent
) {
327 BitsInit
*NewStateInit
= R
->getValueAsBitsInit("NewState");
329 assert(NewStateInit
->getNumBits() <= sizeof(uint64_t) * 8 &&
330 "State cannot be represented in 64 bits!");
331 for (unsigned I
= 0; I
< NewStateInit
->getNumBits(); ++I
) {
332 if (auto *Bit
= dyn_cast
<BitInit
>(NewStateInit
->getBit(I
))) {
334 NewState
|= 1ULL << I
;
338 for (StringRef A
: Parent
->getActionSymbolFields()) {
339 RecordVal
*SymbolV
= R
->getValue(A
);
340 if (auto *Ty
= dyn_cast
<RecordRecTy
>(SymbolV
->getType())) {
341 Actions
.emplace_back(R
->getValueAsDef(A
), 0, "");
342 Types
.emplace_back(Ty
->getAsString());
343 } else if (isa
<IntRecTy
>(SymbolV
->getType())) {
344 Actions
.emplace_back(nullptr, R
->getValueAsInt(A
), "");
345 Types
.emplace_back("unsigned");
346 } else if (isa
<StringRecTy
>(SymbolV
->getType()) ||
347 isa
<CodeRecTy
>(SymbolV
->getType())) {
348 Actions
.emplace_back(nullptr, 0, R
->getValueAsString(A
));
349 Types
.emplace_back("std::string");
351 report_fatal_error("Unhandled symbol type!");
354 StringRef TypeOverride
= Parent
->getActionSymbolType(A
);
355 if (!TypeOverride
.empty())
356 Types
.back() = TypeOverride
;
360 bool Transition::canTransitionFrom(uint64_t State
) {
361 if ((State
& NewState
) == 0)
362 // The bits we want to set are not set;
367 uint64_t Transition::transitionFrom(uint64_t State
) {
368 return State
| NewState
;
371 void CustomDfaEmitter::printActionType(raw_ostream
&OS
) { OS
<< TypeName
; }
373 void CustomDfaEmitter::printActionValue(action_type A
, raw_ostream
&OS
) {
374 const ActionTuple
&AT
= Actions
[A
];
376 OS
<< "std::make_tuple(";
378 for (const auto &SingleAction
: AT
) {
382 SingleAction
.print(OS
);
390 void EmitAutomata(RecordKeeper
&RK
, raw_ostream
&OS
) {
391 AutomatonEmitter(RK
).run(OS
);