[clangd] Fix warnings
[llvm-project.git] / llvm / utils / TableGen / DecoderEmitter.cpp
blob90a6d0ee8acb57346cfd7371654305b56ef7849c
1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // It contains the tablegen backend that emits the decoder functions for
10 // targets with fixed/variable length instruction set.
12 //===----------------------------------------------------------------------===//
14 #include "Common/CodeGenHwModes.h"
15 #include "Common/CodeGenInstruction.h"
16 #include "Common/CodeGenTarget.h"
17 #include "Common/InfoByHwMode.h"
18 #include "Common/VarLenCodeEmitterGen.h"
19 #include "TableGenBackends.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/CachedHashString.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/FormattedStream.h"
36 #include "llvm/Support/LEB128.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/TableGen/Error.h"
39 #include "llvm/TableGen/Record.h"
40 #include <algorithm>
41 #include <cassert>
42 #include <cstddef>
43 #include <cstdint>
44 #include <map>
45 #include <memory>
46 #include <set>
47 #include <string>
48 #include <utility>
49 #include <vector>
51 using namespace llvm;
53 #define DEBUG_TYPE "decoder-emitter"
55 extern cl::OptionCategory DisassemblerEmitterCat;
57 enum SuppressLevel {
58 SUPPRESSION_DISABLE,
59 SUPPRESSION_LEVEL1,
60 SUPPRESSION_LEVEL2
63 cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
64 "suppress-per-hwmode-duplicates",
65 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
66 cl::values(
67 clEnumValN(
68 SUPPRESSION_DISABLE, "O0",
69 "Do not prevent DecoderTable duplications caused by HwModes"),
70 clEnumValN(
71 SUPPRESSION_LEVEL1, "O1",
72 "Remove duplicate DecoderTable entries generated due to HwModes"),
73 clEnumValN(
74 SUPPRESSION_LEVEL2, "O2",
75 "Extract HwModes-specific instructions into new DecoderTables, "
76 "significantly reducing Table Duplications")),
77 cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
79 namespace {
81 STATISTIC(NumEncodings, "Number of encodings considered");
82 STATISTIC(NumEncodingsLackingDisasm,
83 "Number of encodings without disassembler info");
84 STATISTIC(NumInstructions, "Number of instructions considered");
85 STATISTIC(NumEncodingsSupported, "Number of encodings supported");
86 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
88 struct EncodingField {
89 unsigned Base, Width, Offset;
90 EncodingField(unsigned B, unsigned W, unsigned O)
91 : Base(B), Width(W), Offset(O) {}
94 struct OperandInfo {
95 std::vector<EncodingField> Fields;
96 std::string Decoder;
97 bool HasCompleteDecoder;
98 uint64_t InitValue;
100 OperandInfo(std::string D, bool HCD)
101 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {}
103 void addField(unsigned Base, unsigned Width, unsigned Offset) {
104 Fields.push_back(EncodingField(Base, Width, Offset));
107 unsigned numFields() const { return Fields.size(); }
109 typedef std::vector<EncodingField>::const_iterator const_iterator;
111 const_iterator begin() const { return Fields.begin(); }
112 const_iterator end() const { return Fields.end(); }
115 typedef std::vector<uint8_t> DecoderTable;
116 typedef uint32_t DecoderFixup;
117 typedef std::vector<DecoderFixup> FixupList;
118 typedef std::vector<FixupList> FixupScopeList;
119 typedef SmallSetVector<CachedHashString, 16> PredicateSet;
120 typedef SmallSetVector<CachedHashString, 16> DecoderSet;
121 struct DecoderTableInfo {
122 DecoderTable Table;
123 FixupScopeList FixupStack;
124 PredicateSet Predicates;
125 DecoderSet Decoders;
128 struct EncodingAndInst {
129 const Record *EncodingDef;
130 const CodeGenInstruction *Inst;
131 StringRef HwModeName;
133 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
134 StringRef HwModeName = "")
135 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {}
138 struct EncodingIDAndOpcode {
139 unsigned EncodingID;
140 unsigned Opcode;
142 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
143 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
144 : EncodingID(EncodingID), Opcode(Opcode) {}
147 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
148 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
150 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
151 if (Value.EncodingDef != Value.Inst->TheDef)
152 OS << Value.EncodingDef->getName() << ":";
153 OS << Value.Inst->TheDef->getName();
154 return OS;
157 class DecoderEmitter {
158 const RecordKeeper &RK;
159 std::vector<EncodingAndInst> NumberedEncodings;
161 public:
162 DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace)
163 : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {}
165 // Emit the decoder state machine table.
166 void emitTable(formatted_raw_ostream &OS, DecoderTable &Table, indent Indent,
167 unsigned BitWidth, StringRef Namespace,
168 const EncodingIDsVec &EncodingIDs) const;
169 void emitInstrLenTable(formatted_raw_ostream &OS,
170 std::vector<unsigned> &InstrLen) const;
171 void emitPredicateFunction(formatted_raw_ostream &OS,
172 PredicateSet &Predicates, indent Indent) const;
173 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
174 indent Indent) const;
176 // run - Output the code emitter
177 void run(raw_ostream &o);
179 private:
180 CodeGenTarget Target;
182 public:
183 StringRef PredicateNamespace;
186 } // end anonymous namespace
188 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
189 // for a bit value.
191 // BIT_UNFILTERED is used as the init value for a filter position. It is used
192 // only for filter processings.
193 typedef enum {
194 BIT_TRUE, // '1'
195 BIT_FALSE, // '0'
196 BIT_UNSET, // '?'
197 BIT_UNFILTERED // unfiltered
198 } bit_value_t;
200 static bool ValueSet(bit_value_t V) {
201 return (V == BIT_TRUE || V == BIT_FALSE);
204 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); }
206 static int Value(bit_value_t V) {
207 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
210 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
211 if (const BitInit *bit = dyn_cast<BitInit>(bits.getBit(index)))
212 return bit->getValue() ? BIT_TRUE : BIT_FALSE;
214 // The bit is uninitialized.
215 return BIT_UNSET;
218 // Prints the bit value for each position.
219 static void dumpBits(raw_ostream &OS, const BitsInit &bits) {
220 for (unsigned index = bits.getNumBits(); index > 0; --index) {
221 switch (bitFromBits(bits, index - 1)) {
222 case BIT_TRUE:
223 OS << "1";
224 break;
225 case BIT_FALSE:
226 OS << "0";
227 break;
228 case BIT_UNSET:
229 OS << "_";
230 break;
231 default:
232 llvm_unreachable("unexpected return value from bitFromBits");
237 static const BitsInit &getBitsField(const Record &def, StringRef str) {
238 const RecordVal *RV = def.getValue(str);
239 if (const BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
240 return *Bits;
242 // variable length instruction
243 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV);
244 SmallVector<const Init *, 16> Bits;
246 for (const auto &SI : VLI) {
247 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) {
248 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) {
249 Bits.push_back(BI->getBit(Idx));
251 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) {
252 Bits.push_back(const_cast<BitInit *>(BI));
253 } else {
254 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx)
255 Bits.push_back(UnsetInit::get(def.getRecords()));
259 return *BitsInit::get(def.getRecords(), Bits);
262 // Representation of the instruction to work on.
263 typedef std::vector<bit_value_t> insn_t;
265 namespace {
267 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL;
269 class FilterChooser;
271 /// Filter - Filter works with FilterChooser to produce the decoding tree for
272 /// the ISA.
274 /// It is useful to think of a Filter as governing the switch stmts of the
275 /// decoding tree in a certain level. Each case stmt delegates to an inferior
276 /// FilterChooser to decide what further decoding logic to employ, or in another
277 /// words, what other remaining bits to look at. The FilterChooser eventually
278 /// chooses a best Filter to do its job.
280 /// This recursive scheme ends when the number of Opcodes assigned to the
281 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
282 /// the Filter/FilterChooser combo does not know how to distinguish among the
283 /// Opcodes assigned.
285 /// An example of a conflict is
287 /// Conflict:
288 /// 111101000.00........00010000....
289 /// 111101000.00........0001........
290 /// 1111010...00........0001........
291 /// 1111010...00....................
292 /// 1111010.........................
293 /// 1111............................
294 /// ................................
295 /// VST4q8a 111101000_00________00010000____
296 /// VST4q8b 111101000_00________00010000____
298 /// The Debug output shows the path that the decoding tree follows to reach the
299 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
300 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
302 /// The encoding info in the .td files does not specify this meta information,
303 /// which could have been used by the decoder to resolve the conflict. The
304 /// decoder could try to decode the even/odd register numbering and assign to
305 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
306 /// version and return the Opcode since the two have the same Asm format string.
307 class Filter {
308 protected:
309 const FilterChooser
310 *Owner; // points to the FilterChooser who owns this filter
311 unsigned StartBit; // the starting bit position
312 unsigned NumBits; // number of bits to filter
313 bool Mixed; // a mixed region contains both set and unset bits
315 // Map of well-known segment value to the set of uid's with that value.
316 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
318 // Set of uid's with non-constant segment values.
319 std::vector<EncodingIDAndOpcode> VariableInstructions;
321 // Map of well-known segment value to its delegate.
322 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
324 // Number of instructions which fall under FilteredInstructions category.
325 unsigned NumFiltered;
327 // Keeps track of the last opcode in the filtered bucket.
328 EncodingIDAndOpcode LastOpcFiltered;
330 public:
331 Filter(Filter &&f);
332 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
334 ~Filter() = default;
336 unsigned getNumFiltered() const { return NumFiltered; }
338 EncodingIDAndOpcode getSingletonOpc() const {
339 assert(NumFiltered == 1);
340 return LastOpcFiltered;
343 // Return the filter chooser for the group of instructions without constant
344 // segment values.
345 const FilterChooser &getVariableFC() const {
346 assert(NumFiltered == 1);
347 assert(FilterChooserMap.size() == 1);
348 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second);
351 // Divides the decoding task into sub tasks and delegates them to the
352 // inferior FilterChooser's.
354 // A special case arises when there's only one entry in the filtered
355 // instructions. In order to unambiguously decode the singleton, we need to
356 // match the remaining undecoded encoding bits against the singleton.
357 void recurse();
359 // Emit table entries to decode instructions given a segment or segments of
360 // bits.
361 void emitTableEntry(DecoderTableInfo &TableInfo) const;
363 // Returns the number of fanout produced by the filter. More fanout implies
364 // the filter distinguishes more categories of instructions.
365 unsigned usefulness() const;
366 }; // end class Filter
368 } // end anonymous namespace
370 // These are states of our finite state machines used in FilterChooser's
371 // filterProcessor() which produces the filter candidates to use.
372 typedef enum {
373 ATTR_NONE,
374 ATTR_FILTERED,
375 ATTR_ALL_SET,
376 ATTR_ALL_UNSET,
377 ATTR_MIXED
378 } bitAttr_t;
380 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters
381 /// in order to perform the decoding of instructions at the current level.
383 /// Decoding proceeds from the top down. Based on the well-known encoding bits
384 /// of instructions available, FilterChooser builds up the possible Filters that
385 /// can further the task of decoding by distinguishing among the remaining
386 /// candidate instructions.
388 /// Once a filter has been chosen, it is called upon to divide the decoding task
389 /// into sub-tasks and delegates them to its inferior FilterChoosers for further
390 /// processings.
392 /// It is useful to think of a Filter as governing the switch stmts of the
393 /// decoding tree. And each case is delegated to an inferior FilterChooser to
394 /// decide what further remaining bits to look at.
395 namespace {
397 class FilterChooser {
398 protected:
399 friend class Filter;
401 // Vector of codegen instructions to choose our filter.
402 ArrayRef<EncodingAndInst> AllInstructions;
404 // Vector of uid's for this filter chooser to work on.
405 // The first member of the pair is the opcode id being decoded, the second is
406 // the opcode id that should be emitted.
407 const std::vector<EncodingIDAndOpcode> &Opcodes;
409 // Lookup table for the operand decoding of instructions.
410 const std::map<unsigned, std::vector<OperandInfo>> &Operands;
412 // Vector of candidate filters.
413 std::vector<Filter> Filters;
415 // Array of bit values passed down from our parent.
416 // Set to all BIT_UNFILTERED's for Parent == NULL.
417 std::vector<bit_value_t> FilterBitValues;
419 // Links to the FilterChooser above us in the decoding tree.
420 const FilterChooser *Parent;
422 // Index of the best filter from Filters.
423 int BestIndex;
425 // Width of instructions
426 unsigned BitWidth;
428 // Parent emitter
429 const DecoderEmitter *Emitter;
431 public:
432 FilterChooser(ArrayRef<EncodingAndInst> Insts,
433 const std::vector<EncodingIDAndOpcode> &IDs,
434 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
435 unsigned BW, const DecoderEmitter *E)
436 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
437 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
438 BitWidth(BW), Emitter(E) {
439 doFilter();
442 FilterChooser(ArrayRef<EncodingAndInst> Insts,
443 const std::vector<EncodingIDAndOpcode> &IDs,
444 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
445 const std::vector<bit_value_t> &ParentFilterBitValues,
446 const FilterChooser &parent)
447 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
448 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
449 BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
450 doFilter();
453 FilterChooser(const FilterChooser &) = delete;
454 void operator=(const FilterChooser &) = delete;
456 unsigned getBitWidth() const { return BitWidth; }
458 protected:
459 // Populates the insn given the uid.
460 void insnWithID(insn_t &Insn, unsigned Opcode) const {
461 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
462 const BitsInit &Bits = getBitsField(*EncodingDef, "Inst");
463 Insn.resize(std::max(BitWidth, Bits.getNumBits()), BIT_UNSET);
464 // We may have a SoftFail bitmask, which specifies a mask where an encoding
465 // may differ from the value in "Inst" and yet still be valid, but the
466 // disassembler should return SoftFail instead of Success.
468 // This is used for marking UNPREDICTABLE instructions in the ARM world.
469 const RecordVal *RV = EncodingDef->getValue("SoftFail");
470 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
471 for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
472 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
473 Insn[i] = BIT_UNSET;
474 else
475 Insn[i] = bitFromBits(Bits, i);
479 // Emit the name of the encoding/instruction pair.
480 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
481 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
482 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef;
483 if (EncodingDef != InstDef)
484 OS << EncodingDef->getName() << ":";
485 OS << InstDef->getName();
488 // Populates the field of the insn given the start position and the number of
489 // consecutive bits to scan for.
491 // Returns a pair of values (indicator, field), where the indicator is false
492 // if there exists any uninitialized bit value in the range and true if all
493 // bits are well-known. The second value is the potentially populated field.
494 std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
495 unsigned NumBits) const;
497 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
498 /// filter array as a series of chars.
499 void dumpFilterArray(raw_ostream &OS,
500 const std::vector<bit_value_t> &filter) const;
502 /// dumpStack - dumpStack traverses the filter chooser chain and calls
503 /// dumpFilterArray on each filter chooser up to the top level one.
504 void dumpStack(raw_ostream &OS, const char *prefix) const;
506 Filter &bestFilter() {
507 assert(BestIndex != -1 && "BestIndex not set");
508 return Filters[BestIndex];
511 bool PositionFiltered(unsigned i) const {
512 return ValueSet(FilterBitValues[i]);
515 // Calculates the island(s) needed to decode the instruction.
516 // This returns a lit of undecoded bits of an instructions, for example,
517 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
518 // decoded bits in order to verify that the instruction matches the Opcode.
519 unsigned getIslands(std::vector<unsigned> &StartBits,
520 std::vector<unsigned> &EndBits,
521 std::vector<uint64_t> &FieldVals,
522 const insn_t &Insn) const;
524 // Emits code to check the Predicates member of an instruction are true.
525 // Returns true if predicate matches were emitted, false otherwise.
526 bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const;
527 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
528 raw_ostream &OS) const;
530 bool doesOpcodeNeedPredicate(unsigned Opc) const;
531 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
532 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
534 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
536 // Emits table entries to decode the singleton.
537 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
538 EncodingIDAndOpcode Opc) const;
540 // Emits code to decode the singleton, and then to decode the rest.
541 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
542 const Filter &Best) const;
544 void emitBinaryParser(raw_ostream &OS, indent Indent,
545 const OperandInfo &OpInfo,
546 bool &OpHasCompleteDecoder) const;
548 void emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc,
549 bool &HasCompleteDecoder) const;
550 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
551 bool &HasCompleteDecoder) const;
553 // Assign a single filter and run with it.
554 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
556 // reportRegion is a helper function for filterProcessor to mark a region as
557 // eligible for use as a filter region.
558 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
559 bool AllowMixed);
561 // FilterProcessor scans the well-known encoding bits of the instructions and
562 // builds up a list of candidate filters. It chooses the best filter and
563 // recursively descends down the decoding tree.
564 bool filterProcessor(bool AllowMixed, bool Greedy = true);
566 // Decides on the best configuration of filter(s) to use in order to decode
567 // the instructions. A conflict of instructions may occur, in which case we
568 // dump the conflict set to the standard error.
569 void doFilter();
571 public:
572 // emitTableEntries - Emit state machine entries to decode our share of
573 // instructions.
574 void emitTableEntries(DecoderTableInfo &TableInfo) const;
577 } // end anonymous namespace
579 ///////////////////////////
580 // //
581 // Filter Implementation //
582 // //
583 ///////////////////////////
585 Filter::Filter(Filter &&f)
586 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
587 FilteredInstructions(std::move(f.FilteredInstructions)),
588 VariableInstructions(std::move(f.VariableInstructions)),
589 FilterChooserMap(std::move(f.FilterChooserMap)),
590 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {}
592 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
593 bool mixed)
594 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
595 assert(StartBit + NumBits - 1 < Owner->BitWidth);
597 NumFiltered = 0;
598 LastOpcFiltered = {0, 0};
600 for (const auto &OpcPair : Owner->Opcodes) {
601 insn_t Insn;
603 // Populates the insn given the uid.
604 Owner->insnWithID(Insn, OpcPair.EncodingID);
606 // Scans the segment for possibly well-specified encoding bits.
607 auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits);
609 if (Ok) {
610 // The encoding bits are well-known. Lets add the uid of the
611 // instruction into the bucket keyed off the constant field value.
612 LastOpcFiltered = OpcPair;
613 FilteredInstructions[Field].push_back(LastOpcFiltered);
614 ++NumFiltered;
615 } else {
616 // Some of the encoding bit(s) are unspecified. This contributes to
617 // one additional member of "Variable" instructions.
618 VariableInstructions.push_back(OpcPair);
622 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) &&
623 "Filter returns no instruction categories");
626 // Divides the decoding task into sub tasks and delegates them to the
627 // inferior FilterChooser's.
629 // A special case arises when there's only one entry in the filtered
630 // instructions. In order to unambiguously decode the singleton, we need to
631 // match the remaining undecoded encoding bits against the singleton.
632 void Filter::recurse() {
633 // Starts by inheriting our parent filter chooser's filter bit values.
634 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues);
636 if (!VariableInstructions.empty()) {
637 // Conservatively marks each segment position as BIT_UNSET.
638 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
639 BitValueArray[StartBit + bitIndex] = BIT_UNSET;
641 // Delegates to an inferior filter chooser for further processing on this
642 // group of instructions whose segment values are variable.
643 FilterChooserMap.insert(std::pair(
644 NO_FIXED_SEGMENTS_SENTINEL,
645 std::make_unique<FilterChooser>(Owner->AllInstructions,
646 VariableInstructions, Owner->Operands,
647 BitValueArray, *Owner)));
650 // No need to recurse for a singleton filtered instruction.
651 // See also Filter::emit*().
652 if (getNumFiltered() == 1) {
653 assert(FilterChooserMap.size() == 1);
654 return;
657 // Otherwise, create sub choosers.
658 for (const auto &Inst : FilteredInstructions) {
660 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
661 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) {
662 if (Inst.first & (1ULL << bitIndex))
663 BitValueArray[StartBit + bitIndex] = BIT_TRUE;
664 else
665 BitValueArray[StartBit + bitIndex] = BIT_FALSE;
668 // Delegates to an inferior filter chooser for further processing on this
669 // category of instructions.
670 FilterChooserMap.insert(
671 std::pair(Inst.first, std::make_unique<FilterChooser>(
672 Owner->AllInstructions, Inst.second,
673 Owner->Operands, BitValueArray, *Owner)));
677 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
678 uint32_t DestIdx) {
679 // Any NumToSkip fixups in the current scope can resolve to the
680 // current location.
681 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend();
682 I != E; ++I) {
683 // Calculate the distance from the byte following the fixup entry byte
684 // to the destination. The Target is calculated from after the 16-bit
685 // NumToSkip entry itself, so subtract two from the displacement here
686 // to account for that.
687 uint32_t FixupIdx = *I;
688 uint32_t Delta = DestIdx - FixupIdx - 3;
689 // Our NumToSkip entries are 24-bits. Make sure our table isn't too
690 // big.
691 assert(Delta < (1u << 24));
692 Table[FixupIdx] = (uint8_t)Delta;
693 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
694 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16);
698 // Emit table entries to decode instructions given a segment or segments
699 // of bits.
700 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
701 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
702 TableInfo.Table.push_back(MCD::OPC_ExtractField);
704 SmallString<16> SBytes;
705 raw_svector_ostream S(SBytes);
706 encodeULEB128(StartBit, S);
707 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end());
708 TableInfo.Table.push_back(NumBits);
710 // A new filter entry begins a new scope for fixup resolution.
711 TableInfo.FixupStack.emplace_back();
713 DecoderTable &Table = TableInfo.Table;
715 size_t PrevFilter = 0;
716 bool HasFallthrough = false;
717 for (const auto &Filter : FilterChooserMap) {
718 // Field value -1 implies a non-empty set of variable instructions.
719 // See also recurse().
720 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) {
721 HasFallthrough = true;
723 // Each scope should always have at least one filter value to check
724 // for.
725 assert(PrevFilter != 0 && "empty filter set!");
726 FixupList &CurScope = TableInfo.FixupStack.back();
727 // Resolve any NumToSkip fixups in the current scope.
728 resolveTableFixups(Table, CurScope, Table.size());
729 CurScope.clear();
730 PrevFilter = 0; // Don't re-process the filter's fallthrough.
731 } else {
732 Table.push_back(MCD::OPC_FilterValue);
733 // Encode and emit the value to filter against.
734 uint8_t Buffer[16];
735 unsigned Len = encodeULEB128(Filter.first, Buffer);
736 Table.insert(Table.end(), Buffer, Buffer + Len);
737 // Reserve space for the NumToSkip entry. We'll backpatch the value
738 // later.
739 PrevFilter = Table.size();
740 Table.push_back(0);
741 Table.push_back(0);
742 Table.push_back(0);
745 // We arrive at a category of instructions with the same segment value.
746 // Now delegate to the sub filter chooser for further decodings.
747 // The case may fallthrough, which happens if the remaining well-known
748 // encoding bits do not match exactly.
749 Filter.second->emitTableEntries(TableInfo);
751 // Now that we've emitted the body of the handler, update the NumToSkip
752 // of the filter itself to be able to skip forward when false. Subtract
753 // two as to account for the width of the NumToSkip field itself.
754 if (PrevFilter) {
755 uint32_t NumToSkip = Table.size() - PrevFilter - 3;
756 assert(NumToSkip < (1u << 24) &&
757 "disassembler decoding table too large!");
758 Table[PrevFilter] = (uint8_t)NumToSkip;
759 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
760 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16);
764 // Any remaining unresolved fixups bubble up to the parent fixup scope.
765 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!");
766 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1;
767 FixupScopeList::iterator Dest = Source - 1;
768 llvm::append_range(*Dest, *Source);
769 TableInfo.FixupStack.pop_back();
771 // If there is no fallthrough, then the final filter should get fixed
772 // up according to the enclosing scope rather than the current position.
773 if (!HasFallthrough)
774 TableInfo.FixupStack.back().push_back(PrevFilter);
777 // Returns the number of fanout produced by the filter. More fanout implies
778 // the filter distinguishes more categories of instructions.
779 unsigned Filter::usefulness() const {
780 if (!VariableInstructions.empty())
781 return FilteredInstructions.size();
782 else
783 return FilteredInstructions.size() + 1;
786 //////////////////////////////////
787 // //
788 // Filterchooser Implementation //
789 // //
790 //////////////////////////////////
792 // Emit the decoder state machine table.
793 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
794 indent Indent, unsigned BitWidth,
795 StringRef Namespace,
796 const EncodingIDsVec &EncodingIDs) const {
797 // We'll need to be able to map from a decoded opcode into the corresponding
798 // EncodingID for this specific combination of BitWidth and Namespace. This
799 // is used below to index into NumberedEncodings.
800 DenseMap<unsigned, unsigned> OpcodeToEncodingID;
801 OpcodeToEncodingID.reserve(EncodingIDs.size());
802 for (const auto &EI : EncodingIDs)
803 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
805 OS << Indent << "static const uint8_t DecoderTable" << Namespace << BitWidth
806 << "[] = {\n";
808 Indent += 2;
810 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
811 auto emitULEB128 = [](DecoderTable::const_iterator I,
812 formatted_raw_ostream &OS) {
813 unsigned Len = 0;
814 while (*I >= 128) {
815 OS << (unsigned)*I++ << ", ";
816 Len++;
818 OS << (unsigned)*I++ << ", ";
819 return Len + 1;
822 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value.
823 auto emitNumToSkip = [](DecoderTable::const_iterator I,
824 formatted_raw_ostream &OS) {
825 uint8_t Byte = *I++;
826 uint32_t NumToSkip = Byte;
827 OS << (unsigned)Byte << ", ";
828 Byte = *I++;
829 OS << (unsigned)Byte << ", ";
830 NumToSkip |= Byte << 8;
831 Byte = *I++;
832 OS << utostr(Byte) << ", ";
833 NumToSkip |= Byte << 16;
834 return NumToSkip;
837 // FIXME: We may be able to use the NumToSkip values to recover
838 // appropriate indentation levels.
839 DecoderTable::const_iterator I = Table.begin();
840 DecoderTable::const_iterator E = Table.end();
841 while (I != E) {
842 assert(I < E && "incomplete decode table entry!");
844 uint64_t Pos = I - Table.begin();
845 OS << "/* " << Pos << " */";
846 OS.PadToColumn(12);
848 switch (*I) {
849 default:
850 PrintFatalError("invalid decode table opcode");
851 case MCD::OPC_ExtractField: {
852 ++I;
853 OS << Indent << "MCD::OPC_ExtractField, ";
855 // ULEB128 encoded start value.
856 const char *ErrMsg = nullptr;
857 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr,
858 Table.data() + Table.size(), &ErrMsg);
859 assert(ErrMsg == nullptr && "ULEB128 value too large!");
860 I += emitULEB128(I, OS);
862 unsigned Len = *I++;
863 OS << Len << ", // Inst{";
864 if (Len > 1)
865 OS << (Start + Len - 1) << "-";
866 OS << Start << "} ...\n";
867 break;
869 case MCD::OPC_FilterValue: {
870 ++I;
871 OS << Indent << "MCD::OPC_FilterValue, ";
872 // The filter value is ULEB128 encoded.
873 I += emitULEB128(I, OS);
875 // 24-bit numtoskip value.
876 uint32_t NumToSkip = emitNumToSkip(I, OS);
877 I += 3;
878 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
879 break;
881 case MCD::OPC_CheckField: {
882 ++I;
883 OS << Indent << "MCD::OPC_CheckField, ";
884 // ULEB128 encoded start value.
885 I += emitULEB128(I, OS);
886 // 8-bit length.
887 unsigned Len = *I++;
888 OS << Len << ", ";
889 // ULEB128 encoded field value.
890 I += emitULEB128(I, OS);
892 // 24-bit numtoskip value.
893 uint32_t NumToSkip = emitNumToSkip(I, OS);
894 I += 3;
895 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
896 break;
898 case MCD::OPC_CheckPredicate: {
899 ++I;
900 OS << Indent << "MCD::OPC_CheckPredicate, ";
901 I += emitULEB128(I, OS);
903 // 24-bit numtoskip value.
904 uint32_t NumToSkip = emitNumToSkip(I, OS);
905 I += 3;
906 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
907 break;
909 case MCD::OPC_Decode:
910 case MCD::OPC_TryDecode: {
911 bool IsTry = *I == MCD::OPC_TryDecode;
912 ++I;
913 // Decode the Opcode value.
914 const char *ErrMsg = nullptr;
915 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr,
916 Table.data() + Table.size(), &ErrMsg);
917 assert(ErrMsg == nullptr && "ULEB128 value too large!");
919 OS << Indent << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, ";
920 I += emitULEB128(I, OS);
922 // Decoder index.
923 I += emitULEB128(I, OS);
925 auto EncI = OpcodeToEncodingID.find(Opc);
926 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
927 auto EncodingID = EncI->second;
929 if (!IsTry) {
930 OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n";
931 break;
934 // Fallthrough for OPC_TryDecode.
936 // 24-bit numtoskip value.
937 uint32_t NumToSkip = emitNumToSkip(I, OS);
938 I += 3;
940 OS << "// Opcode: " << NumberedEncodings[EncodingID]
941 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
942 break;
944 case MCD::OPC_SoftFail: {
945 ++I;
946 OS << Indent << "MCD::OPC_SoftFail";
947 // Positive mask
948 uint64_t Value = 0;
949 unsigned Shift = 0;
950 do {
951 OS << ", " << (unsigned)*I;
952 Value += ((uint64_t)(*I & 0x7f)) << Shift;
953 Shift += 7;
954 } while (*I++ >= 128);
955 if (Value > 127) {
956 OS << " /* 0x";
957 OS.write_hex(Value);
958 OS << " */";
960 // Negative mask
961 Value = 0;
962 Shift = 0;
963 do {
964 OS << ", " << (unsigned)*I;
965 Value += ((uint64_t)(*I & 0x7f)) << Shift;
966 Shift += 7;
967 } while (*I++ >= 128);
968 if (Value > 127) {
969 OS << " /* 0x";
970 OS.write_hex(Value);
971 OS << " */";
973 OS << ",\n";
974 break;
976 case MCD::OPC_Fail: {
977 ++I;
978 OS << Indent << "MCD::OPC_Fail,\n";
979 break;
983 OS << Indent << "0\n";
985 Indent -= 2;
987 OS << Indent << "};\n\n";
990 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
991 std::vector<unsigned> &InstrLen) const {
992 OS << "static const uint8_t InstrLenTable[] = {\n";
993 for (unsigned &Len : InstrLen) {
994 OS << Len << ",\n";
996 OS << "};\n\n";
999 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1000 PredicateSet &Predicates,
1001 indent Indent) const {
1002 // The predicate function is just a big switch statement based on the
1003 // input predicate index.
1004 OS << Indent << "static bool checkDecoderPredicate(unsigned Idx, "
1005 << "const FeatureBitset &Bits) {\n";
1006 Indent += 2;
1007 if (!Predicates.empty()) {
1008 OS << Indent << "switch (Idx) {\n";
1009 OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
1010 unsigned Index = 0;
1011 for (const auto &Predicate : Predicates) {
1012 OS << Indent << "case " << Index++ << ":\n";
1013 OS << Indent + 2 << "return (" << Predicate << ");\n";
1015 OS << Indent << "}\n";
1016 } else {
1017 // No case statement to emit
1018 OS << Indent << "llvm_unreachable(\"Invalid index!\");\n";
1020 Indent -= 2;
1021 OS << Indent << "}\n\n";
1024 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1025 DecoderSet &Decoders,
1026 indent Indent) const {
1027 // The decoder function is just a big switch statement based on the
1028 // input decoder index.
1029 OS << Indent << "template <typename InsnType>\n";
1030 OS << Indent << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
1031 << " unsigned Idx, InsnType insn, MCInst &MI,\n";
1032 OS << Indent << " uint64_t "
1033 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
1034 Indent += 2;
1035 OS << Indent << "DecodeComplete = true;\n";
1036 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1037 // It would be better for emitBinaryParser to use a 64-bit tmp whenever
1038 // possible but fall back to an InsnType-sized tmp for truly large fields.
1039 OS << Indent
1040 << "using TmpType = "
1041 "std::conditional_t<std::is_integral<InsnType>::"
1042 "value, InsnType, uint64_t>;\n";
1043 OS << Indent << "TmpType tmp;\n";
1044 OS << Indent << "switch (Idx) {\n";
1045 OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
1046 unsigned Index = 0;
1047 for (const auto &Decoder : Decoders) {
1048 OS << Indent << "case " << Index++ << ":\n";
1049 OS << Decoder;
1050 OS << Indent + 2 << "return S;\n";
1052 OS << Indent << "}\n";
1053 Indent -= 2;
1054 OS << Indent << "}\n";
1057 // Populates the field of the insn given the start position and the number of
1058 // consecutive bits to scan for.
1060 // Returns a pair of values (indicator, field), where the indicator is false
1061 // if there exists any uninitialized bit value in the range and true if all
1062 // bits are well-known. The second value is the potentially populated field.
1063 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1064 unsigned StartBit,
1065 unsigned NumBits) const {
1066 uint64_t Field = 0;
1068 for (unsigned i = 0; i < NumBits; ++i) {
1069 if (Insn[StartBit + i] == BIT_UNSET)
1070 return {false, Field};
1072 if (Insn[StartBit + i] == BIT_TRUE)
1073 Field = Field | (1ULL << i);
1076 return {true, Field};
1079 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1080 /// filter array as a series of chars.
1081 void FilterChooser::dumpFilterArray(
1082 raw_ostream &OS, const std::vector<bit_value_t> &filter) const {
1083 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
1084 switch (filter[bitIndex - 1]) {
1085 case BIT_UNFILTERED:
1086 OS << ".";
1087 break;
1088 case BIT_UNSET:
1089 OS << "_";
1090 break;
1091 case BIT_TRUE:
1092 OS << "1";
1093 break;
1094 case BIT_FALSE:
1095 OS << "0";
1096 break;
1101 /// dumpStack - dumpStack traverses the filter chooser chain and calls
1102 /// dumpFilterArray on each filter chooser up to the top level one.
1103 void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const {
1104 const FilterChooser *current = this;
1106 while (current) {
1107 OS << prefix;
1108 dumpFilterArray(OS, current->FilterBitValues);
1109 OS << '\n';
1110 current = current->Parent;
1114 // Calculates the island(s) needed to decode the instruction.
1115 // This returns a list of undecoded bits of an instructions, for example,
1116 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1117 // decoded bits in order to verify that the instruction matches the Opcode.
1118 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
1119 std::vector<unsigned> &EndBits,
1120 std::vector<uint64_t> &FieldVals,
1121 const insn_t &Insn) const {
1122 unsigned Num, BitNo;
1123 Num = BitNo = 0;
1125 uint64_t FieldVal = 0;
1127 // 0: Init
1128 // 1: Water (the bit value does not affect decoding)
1129 // 2: Island (well-known bit value needed for decoding)
1130 int State = 0;
1132 for (unsigned i = 0; i < BitWidth; ++i) {
1133 int64_t Val = Value(Insn[i]);
1134 bool Filtered = PositionFiltered(i);
1135 switch (State) {
1136 default:
1137 llvm_unreachable("Unreachable code!");
1138 case 0:
1139 case 1:
1140 if (Filtered || Val == -1)
1141 State = 1; // Still in Water
1142 else {
1143 State = 2; // Into the Island
1144 BitNo = 0;
1145 StartBits.push_back(i);
1146 FieldVal = Val;
1148 break;
1149 case 2:
1150 if (Filtered || Val == -1) {
1151 State = 1; // Into the Water
1152 EndBits.push_back(i - 1);
1153 FieldVals.push_back(FieldVal);
1154 ++Num;
1155 } else {
1156 State = 2; // Still in Island
1157 ++BitNo;
1158 FieldVal = FieldVal | Val << BitNo;
1160 break;
1163 // If we are still in Island after the loop, do some housekeeping.
1164 if (State == 2) {
1165 EndBits.push_back(BitWidth - 1);
1166 FieldVals.push_back(FieldVal);
1167 ++Num;
1170 assert(StartBits.size() == Num && EndBits.size() == Num &&
1171 FieldVals.size() == Num);
1172 return Num;
1175 void FilterChooser::emitBinaryParser(raw_ostream &OS, indent Indent,
1176 const OperandInfo &OpInfo,
1177 bool &OpHasCompleteDecoder) const {
1178 const std::string &Decoder = OpInfo.Decoder;
1180 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0;
1182 if (UseInsertBits) {
1183 OS << Indent << "tmp = 0x";
1184 OS.write_hex(OpInfo.InitValue);
1185 OS << ";\n";
1188 for (const EncodingField &EF : OpInfo) {
1189 OS << Indent;
1190 if (UseInsertBits)
1191 OS << "insertBits(tmp, ";
1192 else
1193 OS << "tmp = ";
1194 OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')';
1195 if (UseInsertBits)
1196 OS << ", " << EF.Offset << ", " << EF.Width << ')';
1197 else if (EF.Offset != 0)
1198 OS << " << " << EF.Offset;
1199 OS << ";\n";
1202 if (Decoder != "") {
1203 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1204 OS << Indent << "if (!Check(S, " << Decoder
1205 << "(MI, tmp, Address, Decoder))) { "
1206 << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
1207 << "return MCDisassembler::Fail; }\n";
1208 } else {
1209 OpHasCompleteDecoder = true;
1210 OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
1214 void FilterChooser::emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc,
1215 bool &HasCompleteDecoder) const {
1216 HasCompleteDecoder = true;
1218 for (const auto &Op : Operands.find(Opc)->second) {
1219 // If a custom instruction decoder was specified, use that.
1220 if (Op.numFields() == 0 && !Op.Decoder.empty()) {
1221 HasCompleteDecoder = Op.HasCompleteDecoder;
1222 OS << Indent << "if (!Check(S, " << Op.Decoder
1223 << "(MI, insn, Address, Decoder))) { "
1224 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1225 << "return MCDisassembler::Fail; }\n";
1226 break;
1229 bool OpHasCompleteDecoder;
1230 emitBinaryParser(OS, Indent, Op, OpHasCompleteDecoder);
1231 if (!OpHasCompleteDecoder)
1232 HasCompleteDecoder = false;
1236 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
1237 bool &HasCompleteDecoder) const {
1238 // Build up the predicate string.
1239 SmallString<256> Decoder;
1240 // FIXME: emitDecoder() function can take a buffer directly rather than
1241 // a stream.
1242 raw_svector_ostream S(Decoder);
1243 emitDecoder(S, indent(4), Opc, HasCompleteDecoder);
1245 // Using the full decoder string as the key value here is a bit
1246 // heavyweight, but is effective. If the string comparisons become a
1247 // performance concern, we can implement a mangling of the predicate
1248 // data easily enough with a map back to the actual string. That's
1249 // overkill for now, though.
1251 // Make sure the predicate is in the table.
1252 Decoders.insert(CachedHashString(Decoder));
1253 // Now figure out the index for when we write out the table.
1254 DecoderSet::const_iterator P = find(Decoders, Decoder.str());
1255 return (unsigned)(P - Decoders.begin());
1258 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
1259 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1260 raw_ostream &OS) const {
1261 if (const auto *D = dyn_cast<DefInit>(&Val)) {
1262 if (!D->getDef()->isSubClassOf("SubtargetFeature"))
1263 return true;
1264 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1265 << "]";
1266 return false;
1268 if (const auto *D = dyn_cast<DagInit>(&Val)) {
1269 std::string Op = D->getOperator()->getAsString();
1270 if (Op == "not" && D->getNumArgs() == 1) {
1271 OS << '!';
1272 return emitPredicateMatchAux(*D->getArg(0), true, OS);
1274 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
1275 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
1276 if (Paren)
1277 OS << '(';
1278 ListSeparator LS(Op == "any_of" ? " || " : " && ");
1279 for (auto *Arg : D->getArgs()) {
1280 OS << LS;
1281 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS))
1282 return true;
1284 if (Paren)
1285 OS << ')';
1286 return false;
1289 return true;
1292 bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const {
1293 const ListInit *Predicates =
1294 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1295 bool IsFirstEmission = true;
1296 for (unsigned i = 0; i < Predicates->size(); ++i) {
1297 const Record *Pred = Predicates->getElementAsRecord(i);
1298 if (!Pred->getValue("AssemblerMatcherPredicate"))
1299 continue;
1301 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1302 continue;
1304 if (!IsFirstEmission)
1305 OS << " && ";
1306 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"),
1307 Predicates->size() > 1, OS))
1308 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
1309 IsFirstEmission = false;
1311 return !Predicates->empty();
1314 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1315 const ListInit *Predicates =
1316 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1317 for (unsigned i = 0; i < Predicates->size(); ++i) {
1318 const Record *Pred = Predicates->getElementAsRecord(i);
1319 if (!Pred->getValue("AssemblerMatcherPredicate"))
1320 continue;
1322 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1323 return true;
1325 return false;
1328 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1329 StringRef Predicate) const {
1330 // Using the full predicate string as the key value here is a bit
1331 // heavyweight, but is effective. If the string comparisons become a
1332 // performance concern, we can implement a mangling of the predicate
1333 // data easily enough with a map back to the actual string. That's
1334 // overkill for now, though.
1336 // Make sure the predicate is in the table.
1337 TableInfo.Predicates.insert(CachedHashString(Predicate));
1338 // Now figure out the index for when we write out the table.
1339 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate);
1340 return (unsigned)(P - TableInfo.Predicates.begin());
1343 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1344 unsigned Opc) const {
1345 if (!doesOpcodeNeedPredicate(Opc))
1346 return;
1348 // Build up the predicate string.
1349 SmallString<256> Predicate;
1350 // FIXME: emitPredicateMatch() functions can take a buffer directly rather
1351 // than a stream.
1352 raw_svector_ostream PS(Predicate);
1353 emitPredicateMatch(PS, Opc);
1355 // Figure out the index into the predicate table for the predicate just
1356 // computed.
1357 unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
1358 SmallString<16> PBytes;
1359 raw_svector_ostream S(PBytes);
1360 encodeULEB128(PIdx, S);
1362 TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
1363 // Predicate index.
1364 for (const auto PB : PBytes)
1365 TableInfo.Table.push_back(PB);
1366 // Push location for NumToSkip backpatching.
1367 TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
1368 TableInfo.Table.push_back(0);
1369 TableInfo.Table.push_back(0);
1370 TableInfo.Table.push_back(0);
1373 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1374 unsigned Opc) const {
1375 const Record *EncodingDef = AllInstructions[Opc].EncodingDef;
1376 const RecordVal *RV = EncodingDef->getValue("SoftFail");
1377 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
1379 if (!SFBits)
1380 return;
1381 const BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst");
1383 APInt PositiveMask(BitWidth, 0ULL);
1384 APInt NegativeMask(BitWidth, 0ULL);
1385 for (unsigned i = 0; i < BitWidth; ++i) {
1386 bit_value_t B = bitFromBits(*SFBits, i);
1387 bit_value_t IB = bitFromBits(*InstBits, i);
1389 if (B != BIT_TRUE)
1390 continue;
1392 switch (IB) {
1393 case BIT_FALSE:
1394 // The bit is meant to be false, so emit a check to see if it is true.
1395 PositiveMask.setBit(i);
1396 break;
1397 case BIT_TRUE:
1398 // The bit is meant to be true, so emit a check to see if it is false.
1399 NegativeMask.setBit(i);
1400 break;
1401 default:
1402 // The bit is not set; this must be an error!
1403 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1404 << AllInstructions[Opc] << " is set but Inst{" << i
1405 << "} is unset!\n"
1406 << " - You can only mark a bit as SoftFail if it is fully defined"
1407 << " (1/0 - not '?') in Inst\n";
1408 return;
1412 bool NeedPositiveMask = PositiveMask.getBoolValue();
1413 bool NeedNegativeMask = NegativeMask.getBoolValue();
1415 if (!NeedPositiveMask && !NeedNegativeMask)
1416 return;
1418 TableInfo.Table.push_back(MCD::OPC_SoftFail);
1420 SmallString<16> MaskBytes;
1421 raw_svector_ostream S(MaskBytes);
1422 if (NeedPositiveMask) {
1423 encodeULEB128(PositiveMask.getZExtValue(), S);
1424 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
1425 TableInfo.Table.push_back(MaskBytes[i]);
1426 } else
1427 TableInfo.Table.push_back(0);
1428 if (NeedNegativeMask) {
1429 MaskBytes.clear();
1430 encodeULEB128(NegativeMask.getZExtValue(), S);
1431 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
1432 TableInfo.Table.push_back(MaskBytes[i]);
1433 } else
1434 TableInfo.Table.push_back(0);
1437 // Emits table entries to decode the singleton.
1438 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1439 EncodingIDAndOpcode Opc) const {
1440 std::vector<unsigned> StartBits;
1441 std::vector<unsigned> EndBits;
1442 std::vector<uint64_t> FieldVals;
1443 insn_t Insn;
1444 insnWithID(Insn, Opc.EncodingID);
1446 // Look for islands of undecoded bits of the singleton.
1447 getIslands(StartBits, EndBits, FieldVals, Insn);
1449 unsigned Size = StartBits.size();
1451 // Emit the predicate table entry if one is needed.
1452 emitPredicateTableEntry(TableInfo, Opc.EncodingID);
1454 // Check any additional encoding fields needed.
1455 for (unsigned I = Size; I != 0; --I) {
1456 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1;
1457 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
1458 TableInfo.Table.push_back(MCD::OPC_CheckField);
1459 uint8_t Buffer[16], *P;
1460 encodeULEB128(StartBits[I - 1], Buffer);
1461 for (P = Buffer; *P >= 128; ++P)
1462 TableInfo.Table.push_back(*P);
1463 TableInfo.Table.push_back(*P);
1464 TableInfo.Table.push_back(NumBits);
1465 encodeULEB128(FieldVals[I - 1], Buffer);
1466 for (P = Buffer; *P >= 128; ++P)
1467 TableInfo.Table.push_back(*P);
1468 TableInfo.Table.push_back(*P);
1469 // Push location for NumToSkip backpatching.
1470 TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
1471 // The fixup is always 24-bits, so go ahead and allocate the space
1472 // in the table so all our relative position calculations work OK even
1473 // before we fully resolve the real value here.
1474 TableInfo.Table.push_back(0);
1475 TableInfo.Table.push_back(0);
1476 TableInfo.Table.push_back(0);
1479 // Check for soft failure of the match.
1480 emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
1482 bool HasCompleteDecoder;
1483 unsigned DIdx =
1484 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
1486 // Produce OPC_Decode or OPC_TryDecode opcode based on the information
1487 // whether the instruction decoder is complete or not. If it is complete
1488 // then it handles all possible values of remaining variable/unfiltered bits
1489 // and for any value can determine if the bitpattern is a valid instruction
1490 // or not. This means OPC_Decode will be the final step in the decoding
1491 // process. If it is not complete, then the Fail return code from the
1492 // decoder method indicates that additional processing should be done to see
1493 // if there is any other instruction that also matches the bitpattern and
1494 // can decode it.
1495 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode
1496 : MCD::OPC_TryDecode);
1497 NumEncodingsSupported++;
1498 uint8_t Buffer[16], *p;
1499 encodeULEB128(Opc.Opcode, Buffer);
1500 for (p = Buffer; *p >= 128; ++p)
1501 TableInfo.Table.push_back(*p);
1502 TableInfo.Table.push_back(*p);
1504 SmallString<16> Bytes;
1505 raw_svector_ostream S(Bytes);
1506 encodeULEB128(DIdx, S);
1508 // Decoder index.
1509 for (const auto B : Bytes)
1510 TableInfo.Table.push_back(B);
1512 if (!HasCompleteDecoder) {
1513 // Push location for NumToSkip backpatching.
1514 TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
1515 // Allocate the space for the fixup.
1516 TableInfo.Table.push_back(0);
1517 TableInfo.Table.push_back(0);
1518 TableInfo.Table.push_back(0);
1522 // Emits table entries to decode the singleton, and then to decode the rest.
1523 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1524 const Filter &Best) const {
1525 EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1527 // complex singletons need predicate checks from the first singleton
1528 // to refer forward to the variable filterchooser that follows.
1529 TableInfo.FixupStack.emplace_back();
1531 emitSingletonTableEntry(TableInfo, Opc);
1533 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
1534 TableInfo.Table.size());
1535 TableInfo.FixupStack.pop_back();
1537 Best.getVariableFC().emitTableEntries(TableInfo);
1540 // Assign a single filter and run with it. Top level API client can initialize
1541 // with a single filter to start the filtering process.
1542 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1543 bool mixed) {
1544 Filters.clear();
1545 Filters.emplace_back(*this, startBit, numBit, true);
1546 BestIndex = 0; // Sole Filter instance to choose from.
1547 bestFilter().recurse();
1550 // reportRegion is a helper function for filterProcessor to mark a region as
1551 // eligible for use as a filter region.
1552 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1553 unsigned BitIndex, bool AllowMixed) {
1554 if (RA == ATTR_MIXED && AllowMixed)
1555 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true);
1556 else if (RA == ATTR_ALL_SET && !AllowMixed)
1557 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false);
1560 // FilterProcessor scans the well-known encoding bits of the instructions and
1561 // builds up a list of candidate filters. It chooses the best filter and
1562 // recursively descends down the decoding tree.
1563 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1564 Filters.clear();
1565 BestIndex = -1;
1566 unsigned numInstructions = Opcodes.size();
1568 assert(numInstructions && "Filter created with no instructions");
1570 // No further filtering is necessary.
1571 if (numInstructions == 1)
1572 return true;
1574 // Heuristics. See also doFilter()'s "Heuristics" comment when num of
1575 // instructions is 3.
1576 if (AllowMixed && !Greedy) {
1577 assert(numInstructions == 3);
1579 for (const auto &Opcode : Opcodes) {
1580 std::vector<unsigned> StartBits;
1581 std::vector<unsigned> EndBits;
1582 std::vector<uint64_t> FieldVals;
1583 insn_t Insn;
1585 insnWithID(Insn, Opcode.EncodingID);
1587 // Look for islands of undecoded bits of any instruction.
1588 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
1589 // Found an instruction with island(s). Now just assign a filter.
1590 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true);
1591 return true;
1596 unsigned BitIndex;
1598 // We maintain BIT_WIDTH copies of the bitAttrs automaton.
1599 // The automaton consumes the corresponding bit from each
1600 // instruction.
1602 // Input symbols: 0, 1, and _ (unset).
1603 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1604 // Initial state: NONE.
1606 // (NONE) ------- [01] -> (ALL_SET)
1607 // (NONE) ------- _ ----> (ALL_UNSET)
1608 // (ALL_SET) ---- [01] -> (ALL_SET)
1609 // (ALL_SET) ---- _ ----> (MIXED)
1610 // (ALL_UNSET) -- [01] -> (MIXED)
1611 // (ALL_UNSET) -- _ ----> (ALL_UNSET)
1612 // (MIXED) ------ . ----> (MIXED)
1613 // (FILTERED)---- . ----> (FILTERED)
1615 std::vector<bitAttr_t> bitAttrs;
1617 // FILTERED bit positions provide no entropy and are not worthy of pursuing.
1618 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1619 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
1620 if (FilterBitValues[BitIndex] == BIT_TRUE ||
1621 FilterBitValues[BitIndex] == BIT_FALSE)
1622 bitAttrs.push_back(ATTR_FILTERED);
1623 else
1624 bitAttrs.push_back(ATTR_NONE);
1626 for (const auto &OpcPair : Opcodes) {
1627 insn_t insn;
1629 insnWithID(insn, OpcPair.EncodingID);
1631 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1632 switch (bitAttrs[BitIndex]) {
1633 case ATTR_NONE:
1634 if (insn[BitIndex] == BIT_UNSET)
1635 bitAttrs[BitIndex] = ATTR_ALL_UNSET;
1636 else
1637 bitAttrs[BitIndex] = ATTR_ALL_SET;
1638 break;
1639 case ATTR_ALL_SET:
1640 if (insn[BitIndex] == BIT_UNSET)
1641 bitAttrs[BitIndex] = ATTR_MIXED;
1642 break;
1643 case ATTR_ALL_UNSET:
1644 if (insn[BitIndex] != BIT_UNSET)
1645 bitAttrs[BitIndex] = ATTR_MIXED;
1646 break;
1647 case ATTR_MIXED:
1648 case ATTR_FILTERED:
1649 break;
1654 // The regionAttr automaton consumes the bitAttrs automatons' state,
1655 // lowest-to-highest.
1657 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1658 // States: NONE, ALL_SET, MIXED
1659 // Initial state: NONE
1661 // (NONE) ----- F --> (NONE)
1662 // (NONE) ----- S --> (ALL_SET) ; and set region start
1663 // (NONE) ----- U --> (NONE)
1664 // (NONE) ----- M --> (MIXED) ; and set region start
1665 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
1666 // (ALL_SET) -- S --> (ALL_SET)
1667 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
1668 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
1669 // (MIXED) ---- F --> (NONE) ; and report a MIXED region
1670 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
1671 // (MIXED) ---- U --> (NONE) ; and report a MIXED region
1672 // (MIXED) ---- M --> (MIXED)
1674 bitAttr_t RA = ATTR_NONE;
1675 unsigned StartBit = 0;
1677 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1678 bitAttr_t bitAttr = bitAttrs[BitIndex];
1680 assert(bitAttr != ATTR_NONE && "Bit without attributes");
1682 switch (RA) {
1683 case ATTR_NONE:
1684 switch (bitAttr) {
1685 case ATTR_FILTERED:
1686 break;
1687 case ATTR_ALL_SET:
1688 StartBit = BitIndex;
1689 RA = ATTR_ALL_SET;
1690 break;
1691 case ATTR_ALL_UNSET:
1692 break;
1693 case ATTR_MIXED:
1694 StartBit = BitIndex;
1695 RA = ATTR_MIXED;
1696 break;
1697 default:
1698 llvm_unreachable("Unexpected bitAttr!");
1700 break;
1701 case ATTR_ALL_SET:
1702 switch (bitAttr) {
1703 case ATTR_FILTERED:
1704 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1705 RA = ATTR_NONE;
1706 break;
1707 case ATTR_ALL_SET:
1708 break;
1709 case ATTR_ALL_UNSET:
1710 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1711 RA = ATTR_NONE;
1712 break;
1713 case ATTR_MIXED:
1714 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1715 StartBit = BitIndex;
1716 RA = ATTR_MIXED;
1717 break;
1718 default:
1719 llvm_unreachable("Unexpected bitAttr!");
1721 break;
1722 case ATTR_MIXED:
1723 switch (bitAttr) {
1724 case ATTR_FILTERED:
1725 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1726 StartBit = BitIndex;
1727 RA = ATTR_NONE;
1728 break;
1729 case ATTR_ALL_SET:
1730 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1731 StartBit = BitIndex;
1732 RA = ATTR_ALL_SET;
1733 break;
1734 case ATTR_ALL_UNSET:
1735 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1736 RA = ATTR_NONE;
1737 break;
1738 case ATTR_MIXED:
1739 break;
1740 default:
1741 llvm_unreachable("Unexpected bitAttr!");
1743 break;
1744 case ATTR_ALL_UNSET:
1745 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1746 case ATTR_FILTERED:
1747 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1751 // At the end, if we're still in ALL_SET or MIXED states, report a region
1752 switch (RA) {
1753 case ATTR_NONE:
1754 break;
1755 case ATTR_FILTERED:
1756 break;
1757 case ATTR_ALL_SET:
1758 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1759 break;
1760 case ATTR_ALL_UNSET:
1761 break;
1762 case ATTR_MIXED:
1763 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1764 break;
1767 // We have finished with the filter processings. Now it's time to choose
1768 // the best performing filter.
1769 BestIndex = 0;
1770 bool AllUseless = true;
1771 unsigned BestScore = 0;
1773 for (const auto &[Idx, Filter] : enumerate(Filters)) {
1774 unsigned Usefulness = Filter.usefulness();
1776 if (Usefulness)
1777 AllUseless = false;
1779 if (Usefulness > BestScore) {
1780 BestIndex = Idx;
1781 BestScore = Usefulness;
1785 if (!AllUseless)
1786 bestFilter().recurse();
1788 return !AllUseless;
1789 } // end of FilterChooser::filterProcessor(bool)
1791 // Decides on the best configuration of filter(s) to use in order to decode
1792 // the instructions. A conflict of instructions may occur, in which case we
1793 // dump the conflict set to the standard error.
1794 void FilterChooser::doFilter() {
1795 unsigned Num = Opcodes.size();
1796 assert(Num && "FilterChooser created with no instructions");
1798 // Try regions of consecutive known bit values first.
1799 if (filterProcessor(false))
1800 return;
1802 // Then regions of mixed bits (both known and unitialized bit values allowed).
1803 if (filterProcessor(true))
1804 return;
1806 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1807 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1808 // well-known encoding pattern. In such case, we backtrack and scan for the
1809 // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1810 if (Num == 3 && filterProcessor(true, false))
1811 return;
1813 // If we come to here, the instruction decoding has failed.
1814 // Set the BestIndex to -1 to indicate so.
1815 BestIndex = -1;
1818 // emitTableEntries - Emit state machine entries to decode our share of
1819 // instructions.
1820 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1821 if (Opcodes.size() == 1) {
1822 // There is only one instruction in the set, which is great!
1823 // Call emitSingletonDecoder() to see whether there are any remaining
1824 // encodings bits.
1825 emitSingletonTableEntry(TableInfo, Opcodes[0]);
1826 return;
1829 // Choose the best filter to do the decodings!
1830 if (BestIndex != -1) {
1831 const Filter &Best = Filters[BestIndex];
1832 if (Best.getNumFiltered() == 1)
1833 emitSingletonTableEntry(TableInfo, Best);
1834 else
1835 Best.emitTableEntry(TableInfo);
1836 return;
1839 // We don't know how to decode these instructions! Dump the
1840 // conflict set and bail.
1842 // Print out useful conflict information for postmortem analysis.
1843 errs() << "Decoding Conflict:\n";
1845 dumpStack(errs(), "\t\t");
1847 for (auto Opcode : Opcodes) {
1848 errs() << '\t';
1849 emitNameWithID(errs(), Opcode.EncodingID);
1850 errs() << " ";
1851 dumpBits(
1852 errs(),
1853 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst"));
1854 errs() << '\n';
1858 static std::string findOperandDecoderMethod(const Record *Record) {
1859 std::string Decoder;
1861 const RecordVal *DecoderString = Record->getValue("DecoderMethod");
1862 const StringInit *String =
1863 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
1864 if (String) {
1865 Decoder = std::string(String->getValue());
1866 if (!Decoder.empty())
1867 return Decoder;
1870 if (Record->isSubClassOf("RegisterOperand"))
1871 // Allows use of a DecoderMethod in referenced RegisterClass if set.
1872 return findOperandDecoderMethod(Record->getValueAsDef("RegClass"));
1874 if (Record->isSubClassOf("RegisterClass")) {
1875 Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1876 } else if (Record->isSubClassOf("PointerLikeRegClass")) {
1877 Decoder = "DecodePointerLikeRegClass" +
1878 utostr(Record->getValueAsInt("RegClassKind"));
1881 return Decoder;
1884 OperandInfo getOpInfo(const Record *TypeRecord) {
1885 std::string Decoder = findOperandDecoderMethod(TypeRecord);
1887 const RecordVal *HasCompleteDecoderVal =
1888 TypeRecord->getValue("hasCompleteDecoder");
1889 const BitInit *HasCompleteDecoderBit =
1890 HasCompleteDecoderVal
1891 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
1892 : nullptr;
1893 bool HasCompleteDecoder =
1894 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1896 return OperandInfo(std::move(Decoder), HasCompleteDecoder);
1899 static void parseVarLenInstOperand(const Record &Def,
1900 std::vector<OperandInfo> &Operands,
1901 const CodeGenInstruction &CGI) {
1903 const RecordVal *RV = Def.getValue("Inst");
1904 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
1905 SmallVector<int> TiedTo;
1907 for (const auto &[Idx, Op] : enumerate(CGI.Operands)) {
1908 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
1909 for (auto *Arg : Op.MIOperandInfo->getArgs())
1910 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
1911 else
1912 Operands.push_back(getOpInfo(Op.Rec));
1914 int TiedReg = Op.getTiedRegister();
1915 TiedTo.push_back(-1);
1916 if (TiedReg != -1) {
1917 TiedTo[Idx] = TiedReg;
1918 TiedTo[TiedReg] = Idx;
1922 unsigned CurrBitPos = 0;
1923 for (const auto &EncodingSegment : VLI) {
1924 unsigned Offset = 0;
1925 StringRef OpName;
1927 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
1928 OpName = SI->getValue();
1929 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
1930 OpName = cast<StringInit>(DI->getArg(0))->getValue();
1931 Offset = cast<IntInit>(DI->getArg(2))->getValue();
1934 if (!OpName.empty()) {
1935 auto OpSubOpPair =
1936 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1937 OpName);
1938 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair);
1939 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1940 if (!EncodingSegment.CustomDecoder.empty())
1941 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1943 int TiedReg = TiedTo[OpSubOpPair.first];
1944 if (TiedReg != -1) {
1945 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1946 std::pair(TiedReg, OpSubOpPair.second));
1947 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1951 CurrBitPos += EncodingSegment.BitWidth;
1955 static void debugDumpRecord(const Record &Rec) {
1956 // Dump the record, so we can see what's going on.
1957 PrintNote([&Rec](raw_ostream &OS) {
1958 OS << "Dumping record for previous error:\n";
1959 OS << Rec;
1963 /// For an operand field named OpName: populate OpInfo.InitValue with the
1964 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1965 /// insert from the decoded instruction.
1966 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1967 std::map<std::string, std::string> &TiedNames,
1968 StringRef OpName, OperandInfo &OpInfo) {
1969 // Some bits of the operand may be required to be 1 depending on the
1970 // instruction's encoding. Collect those bits.
1971 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName))
1972 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
1973 for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
1974 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
1975 if (OpBit->getValue())
1976 OpInfo.InitValue |= 1ULL << I;
1978 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) {
1979 const VarInit *Var;
1980 unsigned Offset = 0;
1981 for (; J != Bits.getNumBits(); ++J) {
1982 const VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J));
1983 if (BJ) {
1984 Var = dyn_cast<VarInit>(BJ->getBitVar());
1985 if (I == J)
1986 Offset = BJ->getBitNum();
1987 else if (BJ->getBitNum() != Offset + J - I)
1988 break;
1989 } else {
1990 Var = dyn_cast<VarInit>(Bits.getBit(J));
1992 if (!Var || (Var->getName() != OpName &&
1993 Var->getName() != TiedNames[std::string(OpName)]))
1994 break;
1996 if (I == J)
1997 ++J;
1998 else
1999 OpInfo.addField(I, J - I, Offset);
2003 static unsigned
2004 populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
2005 const CodeGenInstruction &CGI, unsigned Opc,
2006 std::map<unsigned, std::vector<OperandInfo>> &Operands,
2007 bool IsVarLenInst) {
2008 const Record &Def = *CGI.TheDef;
2009 // If all the bit positions are not specified; do not decode this instruction.
2010 // We are bound to fail! For proper disassembly, the well-known encoding bits
2011 // of the instruction must be fully specified.
2013 const BitsInit &Bits = getBitsField(EncodingDef, "Inst");
2014 if (Bits.allInComplete())
2015 return 0;
2017 std::vector<OperandInfo> InsnOperands;
2019 // If the instruction has specified a custom decoding hook, use that instead
2020 // of trying to auto-generate the decoder.
2021 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
2022 if (InstDecoder != "") {
2023 bool HasCompleteInstDecoder =
2024 EncodingDef.getValueAsBit("hasCompleteDecoder");
2025 InsnOperands.push_back(
2026 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder));
2027 Operands[Opc] = std::move(InsnOperands);
2028 return Bits.getNumBits();
2031 // Generate a description of the operand of the instruction that we know
2032 // how to decode automatically.
2033 // FIXME: We'll need to have a way to manually override this as needed.
2035 // Gather the outputs/inputs of the instruction, so we can find their
2036 // positions in the encoding. This assumes for now that they appear in the
2037 // MCInst in the order that they're listed.
2038 std::vector<std::pair<const Init *, StringRef>> InOutOperands;
2039 const DagInit *Out = Def.getValueAsDag("OutOperandList");
2040 const DagInit *In = Def.getValueAsDag("InOperandList");
2041 for (const auto &[Idx, Arg] : enumerate(Out->getArgs()))
2042 InOutOperands.push_back(std::pair(Arg, Out->getArgNameStr(Idx)));
2043 for (const auto &[Idx, Arg] : enumerate(In->getArgs()))
2044 InOutOperands.push_back(std::pair(Arg, In->getArgNameStr(Idx)));
2046 // Search for tied operands, so that we can correctly instantiate
2047 // operands that are not explicitly represented in the encoding.
2048 std::map<std::string, std::string> TiedNames;
2049 for (const auto &[I, Op] : enumerate(CGI.Operands)) {
2050 for (const auto &[J, CI] : enumerate(Op.Constraints)) {
2051 if (CI.isTied()) {
2052 std::pair<unsigned, unsigned> SO =
2053 CGI.Operands.getSubOperandNumber(CI.getTiedOperand());
2054 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second];
2055 if (TiedName.empty())
2056 TiedName = CGI.Operands[SO.first].Name;
2057 std::string MyName = Op.SubOpNames[J];
2058 if (MyName.empty())
2059 MyName = Op.Name;
2061 TiedNames[MyName] = TiedName;
2062 TiedNames[TiedName] = std::move(MyName);
2067 if (IsVarLenInst) {
2068 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
2069 } else {
2070 // For each operand, see if we can figure out where it is encoded.
2071 for (const auto &Op : InOutOperands) {
2072 const Init *OpInit = Op.first;
2073 StringRef OpName = Op.second;
2075 // We're ready to find the instruction encoding locations for this
2076 // operand.
2078 // First, find the operand type ("OpInit"), and sub-op names
2079 // ("SubArgDag") if present.
2080 const DagInit *SubArgDag = dyn_cast<DagInit>(OpInit);
2081 if (SubArgDag)
2082 OpInit = SubArgDag->getOperator();
2083 const Record *OpTypeRec = cast<DefInit>(OpInit)->getDef();
2084 // Lookup the sub-operands from the operand type record (note that only
2085 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2086 const DagInit *SubOps = OpTypeRec->isSubClassOf("Operand")
2087 ? OpTypeRec->getValueAsDag("MIOperandInfo")
2088 : nullptr;
2090 // Lookup the decoder method and construct a new OperandInfo to hold our
2091 // result.
2092 OperandInfo OpInfo = getOpInfo(OpTypeRec);
2094 // If we have named sub-operands...
2095 if (SubArgDag) {
2096 // Then there should not be a custom decoder specified on the top-level
2097 // type.
2098 if (!OpInfo.Decoder.empty()) {
2099 PrintError(EncodingDef.getLoc(),
2100 "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2101 OpInit->getAsString() +
2102 "\" with a custom DecoderMethod, but also named "
2103 "sub-operands.");
2104 continue;
2107 // Decode each of the sub-ops separately.
2108 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2109 for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) {
2110 StringRef SubOpName = SubArgDag->getArgNameStr(I);
2111 OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef());
2113 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName,
2114 SubOpInfo);
2115 InsnOperands.push_back(std::move(SubOpInfo));
2117 continue;
2120 // Otherwise, if we have an operand with sub-operands, but they aren't
2121 // named...
2122 if (SubOps && OpInfo.Decoder.empty()) {
2123 // If it's a single sub-operand, and no custom decoder, use the decoder
2124 // from the one sub-operand.
2125 if (SubOps->getNumArgs() == 1)
2126 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef());
2128 // If we have multiple sub-ops, there'd better have a custom
2129 // decoder. (Otherwise we don't know how to populate them properly...)
2130 if (SubOps->getNumArgs() > 1) {
2131 PrintError(EncodingDef.getLoc(),
2132 "DecoderEmitter: operand \"" + OpName +
2133 "\" uses MIOperandInfo with multiple ops, but doesn't "
2134 "have a custom decoder!");
2135 debugDumpRecord(EncodingDef);
2136 continue;
2140 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2141 // FIXME: it should be an error not to find a definition for a given
2142 // operand, rather than just failing to add it to the resulting
2143 // instruction! (This is a longstanding bug, which will be addressed in an
2144 // upcoming change.)
2145 if (OpInfo.numFields() > 0)
2146 InsnOperands.push_back(std::move(OpInfo));
2149 Operands[Opc] = InsnOperands;
2151 #if 0
2152 LLVM_DEBUG({
2153 // Dumps the instruction encoding bits.
2154 dumpBits(errs(), Bits);
2156 errs() << '\n';
2158 // Dumps the list of operand info.
2159 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
2160 const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2161 const std::string &OperandName = Info.Name;
2162 const Record &OperandDef = *Info.Rec;
2164 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2167 #endif
2169 return Bits.getNumBits();
2172 // emitFieldFromInstruction - Emit the templated helper function
2173 // fieldFromInstruction().
2174 // On Windows we make sure that this function is not inlined when
2175 // using the VS compiler. It has a bug which causes the function
2176 // to be optimized out in some circumstances. See llvm.org/pr38292
2177 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2178 OS << R"(
2179 // Helper functions for extracting fields from encoded instructions.
2180 // InsnType must either be integral or an APInt-like object that must:
2181 // * be default-constructible and copy-constructible
2182 // * be constructible from an APInt (this can be private)
2183 // * Support insertBits(bits, startBit, numBits)
2184 // * Support extractBitsAsZExtValue(numBits, startBit)
2185 // * Support the ~, &, ==, and != operators with other objects of the same type
2186 // * Support the != and bitwise & with uint64_t
2187 // * Support put (<<) to raw_ostream&
2188 template <typename InsnType>
2189 #if defined(_MSC_VER) && !defined(__clang__)
2190 __declspec(noinline)
2191 #endif
2192 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2193 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2194 unsigned numBits) {
2195 assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2196 assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2197 "Instruction field out of bounds!");
2198 InsnType fieldMask;
2199 if (numBits == sizeof(InsnType) * 8)
2200 fieldMask = (InsnType)(-1LL);
2201 else
2202 fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2203 return (insn & fieldMask) >> startBit;
2206 template <typename InsnType>
2207 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2208 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2209 unsigned numBits) {
2210 return insn.extractBitsAsZExtValue(numBits, startBit);
2215 // emitInsertBits - Emit the templated helper function insertBits().
2216 static void emitInsertBits(formatted_raw_ostream &OS) {
2217 OS << R"(
2218 // Helper function for inserting bits extracted from an encoded instruction into
2219 // a field.
2220 template <typename InsnType>
2221 static std::enable_if_t<std::is_integral<InsnType>::value>
2222 insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) {
2223 assert(startBit + numBits <= sizeof field * 8);
2224 field |= (InsnType)bits << startBit;
2227 template <typename InsnType>
2228 static std::enable_if_t<!std::is_integral<InsnType>::value>
2229 insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) {
2230 field.insertBits(bits, startBit, numBits);
2235 // emitDecodeInstruction - Emit the templated helper function
2236 // decodeInstruction().
2237 static void emitDecodeInstruction(formatted_raw_ostream &OS,
2238 bool IsVarLenInst) {
2239 OS << R"(
2240 template <typename InsnType>
2241 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2242 InsnType insn, uint64_t Address,
2243 const MCDisassembler *DisAsm,
2244 const MCSubtargetInfo &STI)";
2245 if (IsVarLenInst) {
2246 OS << ",\n "
2247 "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2249 OS << R"() {
2250 const FeatureBitset &Bits = STI.getFeatureBits();
2252 const uint8_t *Ptr = DecodeTable;
2253 uint64_t CurFieldValue = 0;
2254 DecodeStatus S = MCDisassembler::Success;
2255 while (true) {
2256 ptrdiff_t Loc = Ptr - DecodeTable;
2257 switch (*Ptr) {
2258 default:
2259 errs() << Loc << ": Unexpected decode table opcode!\n";
2260 return MCDisassembler::Fail;
2261 case MCD::OPC_ExtractField: {
2262 // Decode the start value.
2263 unsigned Start = decodeULEB128AndIncUnsafe(++Ptr);
2264 unsigned Len = *Ptr++;)";
2265 if (IsVarLenInst)
2266 OS << "\n makeUp(insn, Start + Len);";
2267 OS << R"(
2268 CurFieldValue = fieldFromInstruction(insn, Start, Len);
2269 LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2270 << Len << "): " << CurFieldValue << "\n");
2271 break;
2273 case MCD::OPC_FilterValue: {
2274 // Decode the field value.
2275 uint64_t Val = decodeULEB128AndIncUnsafe(++Ptr);
2276 // NumToSkip is a plain 24-bit integer.
2277 unsigned NumToSkip = *Ptr++;
2278 NumToSkip |= (*Ptr++) << 8;
2279 NumToSkip |= (*Ptr++) << 16;
2281 // Perform the filter operation.
2282 if (Val != CurFieldValue)
2283 Ptr += NumToSkip;
2284 LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip
2285 << "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:")
2286 << " continuing at " << (Ptr - DecodeTable) << "\n");
2288 break;
2290 case MCD::OPC_CheckField: {
2291 // Decode the start value.
2292 unsigned Start = decodeULEB128AndIncUnsafe(++Ptr);
2293 unsigned Len = *Ptr;)";
2294 if (IsVarLenInst)
2295 OS << "\n makeUp(insn, Start + Len);";
2296 OS << R"(
2297 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2298 // Decode the field value.
2299 unsigned PtrLen = 0;
2300 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2301 Ptr += PtrLen;
2302 // NumToSkip is a plain 24-bit integer.
2303 unsigned NumToSkip = *Ptr++;
2304 NumToSkip |= (*Ptr++) << 8;
2305 NumToSkip |= (*Ptr++) << 16;
2307 // If the actual and expected values don't match, skip.
2308 if (ExpectedValue != FieldValue)
2309 Ptr += NumToSkip;
2310 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", "
2311 << Len << ", " << ExpectedValue << ", " << NumToSkip
2312 << "): FieldValue = " << FieldValue << ", ExpectedValue = "
2313 << ExpectedValue << ": "
2314 << ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n"));
2315 break;
2317 case MCD::OPC_CheckPredicate: {
2318 // Decode the Predicate Index value.
2319 unsigned PIdx = decodeULEB128AndIncUnsafe(++Ptr);
2320 // NumToSkip is a plain 24-bit integer.
2321 unsigned NumToSkip = *Ptr++;
2322 NumToSkip |= (*Ptr++) << 8;
2323 NumToSkip |= (*Ptr++) << 16;
2324 // Check the predicate.
2325 bool Pred;
2326 if (!(Pred = checkDecoderPredicate(PIdx, Bits)))
2327 Ptr += NumToSkip;
2328 (void)Pred;
2329 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
2330 << (Pred ? "PASS\n" : "FAIL\n"));
2332 break;
2334 case MCD::OPC_Decode: {
2335 // Decode the Opcode value.
2336 unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr);
2337 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2339 MI.clear();
2340 MI.setOpcode(Opc);
2341 bool DecodeComplete;)";
2342 if (IsVarLenInst) {
2343 OS << "\n unsigned Len = InstrLenTable[Opc];\n"
2344 << " makeUp(insn, Len);";
2346 OS << R"(
2347 S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);
2348 assert(DecodeComplete);
2350 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2351 << ", using decoder " << DecodeIdx << ": "
2352 << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
2353 return S;
2355 case MCD::OPC_TryDecode: {
2356 // Decode the Opcode value.
2357 unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr);
2358 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2359 // NumToSkip is a plain 24-bit integer.
2360 unsigned NumToSkip = *Ptr++;
2361 NumToSkip |= (*Ptr++) << 8;
2362 NumToSkip |= (*Ptr++) << 16;
2364 // Perform the decode operation.
2365 MCInst TmpMI;
2366 TmpMI.setOpcode(Opc);
2367 bool DecodeComplete;
2368 S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);
2369 LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2370 << ", using decoder " << DecodeIdx << ": ");
2372 if (DecodeComplete) {
2373 // Decoding complete.
2374 LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
2375 MI = TmpMI;
2376 return S;
2377 } else {
2378 assert(S == MCDisassembler::Fail);
2379 // If the decoding was incomplete, skip.
2380 Ptr += NumToSkip;
2381 LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2382 // Reset decode status. This also drops a SoftFail status that could be
2383 // set before the decode attempt.
2384 S = MCDisassembler::Success;
2386 break;
2388 case MCD::OPC_SoftFail: {
2389 // Decode the mask values.
2390 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(++Ptr);
2391 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
2392 bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
2393 if (Fail)
2394 S = MCDisassembler::SoftFail;
2395 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n"));
2396 break;
2398 case MCD::OPC_Fail: {
2399 LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2400 return MCDisassembler::Fail;
2404 llvm_unreachable("bogosity detected in disassembler state machine!");
2410 // Helper to propagate SoftFail status. Returns false if the status is Fail;
2411 // callers are expected to early-exit in that condition. (Note, the '&' operator
2412 // is correct to propagate the values of this enum; see comment on 'enum
2413 // DecodeStatus'.)
2414 static void emitCheck(formatted_raw_ostream &OS) {
2415 OS << R"(
2416 static bool Check(DecodeStatus &Out, DecodeStatus In) {
2417 Out = static_cast<DecodeStatus>(Out & In);
2418 return Out != MCDisassembler::Fail;
2424 // Collect all HwModes referenced by the target for encoding purposes,
2425 // returning a vector of corresponding names.
2426 static void collectHwModesReferencedForEncodings(
2427 const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2428 NamespacesHwModesMap &NamespacesWithHwModes) {
2429 SmallBitVector BV(HWM.getNumModeIds());
2430 for (const auto &MS : HWM.getHwModeSelects()) {
2431 for (const HwModeSelect::PairType &P : MS.second.Items) {
2432 if (P.second->isSubClassOf("InstructionEncoding")) {
2433 std::string DecoderNamespace =
2434 std::string(P.second->getValueAsString("DecoderNamespace"));
2435 if (P.first == DefaultMode) {
2436 NamespacesWithHwModes[DecoderNamespace].insert("");
2437 } else {
2438 NamespacesWithHwModes[DecoderNamespace].insert(
2439 HWM.getMode(P.first).Name);
2441 BV.set(P.first);
2445 transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) {
2446 if (M == DefaultMode)
2447 return StringRef("");
2448 return HWM.getModeName(M, /*IncludeDefault=*/true);
2452 static void
2453 handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2454 const std::vector<StringRef> &HwModeNames,
2455 NamespacesHwModesMap &NamespacesWithHwModes,
2456 std::vector<EncodingAndInst> &GlobalEncodings) {
2457 const Record *InstDef = Instr->TheDef;
2459 switch (DecoderEmitterSuppressDuplicates) {
2460 case SUPPRESSION_DISABLE: {
2461 for (StringRef HwModeName : HwModeNames)
2462 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2463 break;
2465 case SUPPRESSION_LEVEL1: {
2466 std::string DecoderNamespace =
2467 std::string(InstDef->getValueAsString("DecoderNamespace"));
2468 auto It = NamespacesWithHwModes.find(DecoderNamespace);
2469 if (It != NamespacesWithHwModes.end()) {
2470 for (StringRef HwModeName : It->second)
2471 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2472 } else {
2473 // Only emit the encoding once, as it's DecoderNamespace doesn't
2474 // contain any HwModes.
2475 GlobalEncodings.emplace_back(InstDef, Instr, "");
2477 break;
2479 case SUPPRESSION_LEVEL2:
2480 GlobalEncodings.emplace_back(InstDef, Instr, "");
2481 break;
2485 // Emits disassembler code for instruction decoding.
2486 void DecoderEmitter::run(raw_ostream &o) {
2487 formatted_raw_ostream OS(o);
2488 OS << R"(
2489 #include "llvm/MC/MCInst.h"
2490 #include "llvm/MC/MCSubtargetInfo.h"
2491 #include "llvm/Support/DataTypes.h"
2492 #include "llvm/Support/Debug.h"
2493 #include "llvm/Support/LEB128.h"
2494 #include "llvm/Support/raw_ostream.h"
2495 #include "llvm/TargetParser/SubtargetFeature.h"
2496 #include <assert.h>
2498 namespace llvm {
2501 emitFieldFromInstruction(OS);
2502 emitInsertBits(OS);
2503 emitCheck(OS);
2505 Target.reverseBitsForLittleEndianEncoding();
2507 // Parameterize the decoders based on namespace and instruction width.
2509 // First, collect all encoding-related HwModes referenced by the target.
2510 // And establish a mapping table between DecoderNamespace and HwMode.
2511 // If HwModeNames is empty, add the empty string so we always have one HwMode.
2512 const CodeGenHwModes &HWM = Target.getHwModes();
2513 std::vector<StringRef> HwModeNames;
2514 NamespacesHwModesMap NamespacesWithHwModes;
2515 collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes);
2516 if (HwModeNames.empty())
2517 HwModeNames.push_back("");
2519 const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
2520 NumberedEncodings.reserve(NumberedInstructions.size());
2521 for (const auto &NumberedInstruction : NumberedInstructions) {
2522 const Record *InstDef = NumberedInstruction->TheDef;
2523 if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) {
2524 if (const DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
2525 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2526 for (auto &[ModeId, Encoding] : EBM) {
2527 // DecoderTables with DefaultMode should not have any suffix.
2528 if (ModeId == DefaultMode) {
2529 NumberedEncodings.emplace_back(Encoding, NumberedInstruction, "");
2530 } else {
2531 NumberedEncodings.emplace_back(Encoding, NumberedInstruction,
2532 HWM.getMode(ModeId).Name);
2535 continue;
2538 // This instruction is encoded the same on all HwModes.
2539 // According to user needs, provide varying degrees of suppression.
2540 handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames,
2541 NamespacesWithHwModes, NumberedEncodings);
2543 for (const Record *NumberedAlias :
2544 RK.getAllDerivedDefinitions("AdditionalEncoding"))
2545 NumberedEncodings.emplace_back(
2546 NumberedAlias,
2547 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
2549 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2550 OpcMap;
2551 std::map<unsigned, std::vector<OperandInfo>> Operands;
2552 std::vector<unsigned> InstrLen;
2553 bool IsVarLenInst = Target.hasVariableLengthEncodings();
2554 unsigned MaxInstLen = 0;
2556 for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) {
2557 const Record *EncodingDef = NumberedEncoding.EncodingDef;
2558 const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2559 const Record *Def = Inst->TheDef;
2560 unsigned Size = EncodingDef->getValueAsInt("Size");
2561 if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
2562 Def->getValueAsBit("isPseudo") ||
2563 Def->getValueAsBit("isAsmParserOnly") ||
2564 Def->getValueAsBit("isCodeGenOnly")) {
2565 NumEncodingsLackingDisasm++;
2566 continue;
2569 if (NEI < NumberedInstructions.size())
2570 NumInstructions++;
2571 NumEncodings++;
2573 if (!Size && !IsVarLenInst)
2574 continue;
2576 if (IsVarLenInst)
2577 InstrLen.resize(NumberedInstructions.size(), 0);
2579 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI,
2580 Operands, IsVarLenInst)) {
2581 if (IsVarLenInst) {
2582 MaxInstLen = std::max(MaxInstLen, Len);
2583 InstrLen[NEI] = Len;
2585 std::string DecoderNamespace =
2586 std::string(EncodingDef->getValueAsString("DecoderNamespace"));
2587 if (!NumberedEncoding.HwModeName.empty())
2588 DecoderNamespace +=
2589 std::string("_") + NumberedEncoding.HwModeName.str();
2590 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back(
2591 NEI, Target.getInstrIntValue(Def));
2592 } else {
2593 NumEncodingsOmitted++;
2597 DecoderTableInfo TableInfo;
2598 for (const auto &Opc : OpcMap) {
2599 // Emit the decoder for this namespace+width combination.
2600 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
2601 NumberedEncodings.size());
2602 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
2603 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this);
2605 // The decode table is cleared for each top level decoder function. The
2606 // predicates and decoders themselves, however, are shared across all
2607 // decoders to give more opportunities for uniqueing.
2608 TableInfo.Table.clear();
2609 TableInfo.FixupStack.clear();
2610 TableInfo.Table.reserve(16384);
2611 TableInfo.FixupStack.emplace_back();
2612 FC.emitTableEntries(TableInfo);
2613 // Any NumToSkip fixups in the top level scope can resolve to the
2614 // OPC_Fail at the end of the table.
2615 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
2616 // Resolve any NumToSkip fixups in the current scope.
2617 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
2618 TableInfo.Table.size());
2619 TableInfo.FixupStack.clear();
2621 TableInfo.Table.push_back(MCD::OPC_Fail);
2623 // Print the table to the output stream.
2624 emitTable(OS, TableInfo.Table, indent(0), FC.getBitWidth(), Opc.first.first,
2625 Opc.second);
2628 // For variable instruction, we emit a instruction length table
2629 // to let the decoder know how long the instructions are.
2630 // You can see example usage in M68k's disassembler.
2631 if (IsVarLenInst)
2632 emitInstrLenTable(OS, InstrLen);
2633 // Emit the predicate function.
2634 emitPredicateFunction(OS, TableInfo.Predicates, indent(0));
2636 // Emit the decoder function.
2637 emitDecoderFunction(OS, TableInfo.Decoders, indent(0));
2639 // Emit the main entry point for the decoder, decodeInstruction().
2640 emitDecodeInstruction(OS, IsVarLenInst);
2642 OS << "\n} // end namespace llvm\n";
2645 void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS,
2646 StringRef PredicateNamespace) {
2647 DecoderEmitter(RK, PredicateNamespace).run(OS);