Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / utils / TableGen / DFAEmitter.cpp
blob54ad81cbebe81d4c054199816adf21a1d0e49f9a
1 //===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class can produce a generic deterministic finite state automaton (DFA),
10 // given a set of possible states and transitions.
12 // The input transitions can be nondeterministic - this class will produce the
13 // deterministic equivalent state machine.
15 // The generated code can run the DFA and produce an accepted / not accepted
16 // state and also produce, given a sequence of transitions that results in an
17 // accepted state, the sequence of intermediate states. This is useful if the
18 // initial automaton was nondeterministic - it allows mapping back from the DFA
19 // to the NFA.
21 //===----------------------------------------------------------------------===//
23 #include "DFAEmitter.h"
24 #include "SequenceToOffsetTable.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/UniqueVector.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/TableGen/Record.h"
31 #include "llvm/TableGen/TableGenBackend.h"
32 #include <cassert>
33 #include <cstdint>
34 #include <deque>
35 #include <map>
36 #include <set>
37 #include <string>
38 #include <variant>
39 #include <vector>
41 #define DEBUG_TYPE "dfa-emitter"
43 using namespace llvm;
45 //===----------------------------------------------------------------------===//
46 // DfaEmitter implementation. This is independent of the GenAutomaton backend.
47 //===----------------------------------------------------------------------===//
49 void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
50 Actions.insert(A);
51 NfaStates.insert(From);
52 NfaStates.insert(To);
53 NfaTransitions[{From, A}].push_back(To);
54 ++NumNfaTransitions;
57 void DfaEmitter::visitDfaState(const DfaState &DS) {
58 // For every possible action...
59 auto FromId = DfaStates.idFor(DS);
60 for (action_type A : Actions) {
61 DfaState NewStates;
62 DfaTransitionInfo TI;
63 // For every represented state, word pair in the original NFA...
64 for (state_type FromState : DS) {
65 // If this action is possible from this state add the transitioned-to
66 // states to NewStates.
67 auto I = NfaTransitions.find({FromState, A});
68 if (I == NfaTransitions.end())
69 continue;
70 for (state_type &ToState : I->second) {
71 NewStates.push_back(ToState);
72 TI.emplace_back(FromState, ToState);
75 if (NewStates.empty())
76 continue;
77 // Sort and unique.
78 sort(NewStates);
79 NewStates.erase(std::unique(NewStates.begin(), NewStates.end()),
80 NewStates.end());
81 sort(TI);
82 TI.erase(std::unique(TI.begin(), TI.end()), TI.end());
83 unsigned ToId = DfaStates.insert(NewStates);
84 DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI));
88 void DfaEmitter::constructDfa() {
89 DfaState Initial(1, /*NFA initial state=*/0);
90 DfaStates.insert(Initial);
92 // Note that UniqueVector starts indices at 1, not zero.
93 unsigned DfaStateId = 1;
94 while (DfaStateId <= DfaStates.size()) {
95 DfaState S = DfaStates[DfaStateId];
96 visitDfaState(S);
97 DfaStateId++;
101 void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
102 constructDfa();
104 OS << "// Input NFA has " << NfaStates.size() << " states with "
105 << NumNfaTransitions << " transitions.\n";
106 OS << "// Generated DFA has " << DfaStates.size() << " states with "
107 << DfaTransitions.size() << " transitions.\n\n";
109 // Implementation note: We don't bake a simple std::pair<> here as it requires
110 // significantly more effort to parse. A simple test with a large array of
111 // struct-pairs (N=100000) took clang-10 6s to parse. The same array of
112 // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
113 // define the pair type.
115 // FIXME: It may make sense to emit these as ULEB sequences instead of
116 // pairs of uint64_t.
117 OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
118 OS << "// transition implies a set of NFA transitions. These are referred\n";
119 OS << "// to by index in " << Name << "Transitions[].\n";
121 SequenceToOffsetTable<DfaTransitionInfo> Table;
122 std::map<DfaTransitionInfo, unsigned> EmittedIndices;
123 for (auto &T : DfaTransitions)
124 Table.add(T.second.second);
125 Table.layout();
126 OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name
127 << "TransitionInfo = {{\n";
128 Table.emit(
130 [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
131 OS << "{" << P.first << ", " << P.second << "}";
133 "{0ULL, 0ULL}");
135 OS << "}};\n\n";
137 OS << "// A transition in the generated " << Name << " DFA.\n";
138 OS << "struct " << Name << "Transition {\n";
139 OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n";
140 OS << " ";
141 printActionType(OS);
142 OS << " Action; // The input symbol that causes this transition.\n";
143 OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n";
144 OS << " unsigned InfoIdx; // Start index into " << Name
145 << "TransitionInfo.\n";
146 OS << "};\n\n";
148 OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
149 OS << "// The initial state is 1, not zero.\n";
150 OS << "const std::array<" << Name << "Transition, "
151 << DfaTransitions.size() << "> " << Name << "Transitions = {{\n";
152 for (auto &KV : DfaTransitions) {
153 dfa_state_type From = KV.first.first;
154 dfa_state_type To = KV.second.first;
155 action_type A = KV.first.second;
156 unsigned InfoIdx = Table.get(KV.second.second);
157 OS << " {" << From << ", ";
158 printActionValue(A, OS);
159 OS << ", " << To << ", " << InfoIdx << "},\n";
161 OS << "\n}};\n\n";
164 void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }
166 void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
168 //===----------------------------------------------------------------------===//
169 // AutomatonEmitter implementation
170 //===----------------------------------------------------------------------===//
172 namespace {
174 using Action = std::variant<Record *, unsigned, std::string>;
175 using ActionTuple = std::vector<Action>;
176 class Automaton;
178 class Transition {
179 uint64_t NewState;
180 // The tuple of actions that causes this transition.
181 ActionTuple Actions;
182 // The types of the actions; this is the same across all transitions.
183 SmallVector<std::string, 4> Types;
185 public:
186 Transition(Record *R, Automaton *Parent);
187 const ActionTuple &getActions() { return Actions; }
188 SmallVector<std::string, 4> getTypes() { return Types; }
190 bool canTransitionFrom(uint64_t State);
191 uint64_t transitionFrom(uint64_t State);
194 class Automaton {
195 RecordKeeper &Records;
196 Record *R;
197 std::vector<Transition> Transitions;
198 /// All possible action tuples, uniqued.
199 UniqueVector<ActionTuple> Actions;
200 /// The fields within each Transition object to find the action symbols.
201 std::vector<StringRef> ActionSymbolFields;
203 public:
204 Automaton(RecordKeeper &Records, Record *R);
205 void emit(raw_ostream &OS);
207 ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }
208 /// If the type of action A has been overridden (there exists a field
209 /// "TypeOf_A") return that, otherwise return the empty string.
210 StringRef getActionSymbolType(StringRef A);
213 class AutomatonEmitter {
214 RecordKeeper &Records;
216 public:
217 AutomatonEmitter(RecordKeeper &R) : Records(R) {}
218 void run(raw_ostream &OS);
221 /// A DfaEmitter implementation that can print our variant action type.
222 class CustomDfaEmitter : public DfaEmitter {
223 const UniqueVector<ActionTuple> &Actions;
224 std::string TypeName;
226 public:
227 CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)
228 : Actions(Actions), TypeName(TypeName) {}
230 void printActionType(raw_ostream &OS) override;
231 void printActionValue(action_type A, raw_ostream &OS) override;
233 } // namespace
235 void AutomatonEmitter::run(raw_ostream &OS) {
236 for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {
237 Automaton A(Records, R);
238 OS << "#ifdef GET_" << R->getName() << "_DECL\n";
239 A.emit(OS);
240 OS << "#endif // GET_" << R->getName() << "_DECL\n";
244 Automaton::Automaton(RecordKeeper &Records, Record *R)
245 : Records(Records), R(R) {
246 LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");
247 ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");
250 void Automaton::emit(raw_ostream &OS) {
251 StringRef TransitionClass = R->getValueAsString("TransitionClass");
252 for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {
253 assert(T->isSubClassOf("Transition"));
254 Transitions.emplace_back(T, this);
255 Actions.insert(Transitions.back().getActions());
258 LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size()
259 << "\n");
260 LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size()
261 << " potential transitions.\n");
263 StringRef Name = R->getName();
265 CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action");
266 // Starting from the initial state, build up a list of possible states and
267 // transitions.
268 std::deque<uint64_t> Worklist(1, 0);
269 std::set<uint64_t> SeenStates;
270 unsigned NumTransitions = 0;
271 SeenStates.insert(Worklist.front());
272 while (!Worklist.empty()) {
273 uint64_t State = Worklist.front();
274 Worklist.pop_front();
275 for (Transition &T : Transitions) {
276 if (!T.canTransitionFrom(State))
277 continue;
278 uint64_t NewState = T.transitionFrom(State);
279 if (SeenStates.emplace(NewState).second)
280 Worklist.emplace_back(NewState);
281 ++NumTransitions;
282 Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));
285 LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size()
286 << " states with " << NumTransitions << " transitions.\n");
287 (void) NumTransitions;
289 const auto &ActionTypes = Transitions.back().getTypes();
290 OS << "// The type of an action in the " << Name << " automaton.\n";
291 if (ActionTypes.size() == 1) {
292 OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";
293 } else {
294 OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")
295 << ">;\n";
297 OS << "\n";
299 Emitter.emit(Name, OS);
302 StringRef Automaton::getActionSymbolType(StringRef A) {
303 Twine Ty = "TypeOf_" + A;
304 if (!R->getValue(Ty.str()))
305 return "";
306 return R->getValueAsString(Ty.str());
309 Transition::Transition(Record *R, Automaton *Parent) {
310 BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");
311 NewState = 0;
312 assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&
313 "State cannot be represented in 64 bits!");
314 for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {
315 if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {
316 if (Bit->getValue())
317 NewState |= 1ULL << I;
321 for (StringRef A : Parent->getActionSymbolFields()) {
322 RecordVal *SymbolV = R->getValue(A);
323 if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {
324 Actions.emplace_back(R->getValueAsDef(A));
325 Types.emplace_back(Ty->getAsString());
326 } else if (isa<IntRecTy>(SymbolV->getType())) {
327 Actions.emplace_back(static_cast<unsigned>(R->getValueAsInt(A)));
328 Types.emplace_back("unsigned");
329 } else if (isa<StringRecTy>(SymbolV->getType())) {
330 Actions.emplace_back(std::string(R->getValueAsString(A)));
331 Types.emplace_back("std::string");
332 } else {
333 report_fatal_error("Unhandled symbol type!");
336 StringRef TypeOverride = Parent->getActionSymbolType(A);
337 if (!TypeOverride.empty())
338 Types.back() = std::string(TypeOverride);
342 bool Transition::canTransitionFrom(uint64_t State) {
343 if ((State & NewState) == 0)
344 // The bits we want to set are not set;
345 return true;
346 return false;
349 uint64_t Transition::transitionFrom(uint64_t State) {
350 return State | NewState;
353 void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }
355 void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
356 const ActionTuple &AT = Actions[A];
357 if (AT.size() > 1)
358 OS << "std::make_tuple(";
359 ListSeparator LS;
360 for (const auto &SingleAction : AT) {
361 OS << LS;
362 if (const auto *R = std::get_if<Record *>(&SingleAction))
363 OS << (*R)->getName();
364 else if (const auto *S = std::get_if<std::string>(&SingleAction))
365 OS << '"' << *S << '"';
366 else
367 OS << std::get<unsigned>(SingleAction);
369 if (AT.size() > 1)
370 OS << ")";
373 static TableGen::Emitter::OptClass<AutomatonEmitter>
374 X("gen-automata", "Generate generic automata");