1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function that, given a MachineInstr,
11 // returns the (currently, 32-bit unsigned) value of the instruction.
13 //===----------------------------------------------------------------------===//
15 #include "CodeGenInstruction.h"
16 #include "CodeGenTarget.h"
17 #include "SubtargetFeatureInfo.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/TableGen/Record.h"
25 #include "llvm/TableGen/TableGenBackend.h"
38 class CodeEmitterGen
{
39 RecordKeeper
&Records
;
42 CodeEmitterGen(RecordKeeper
&R
) : Records(R
) {}
44 void run(raw_ostream
&o
);
47 int getVariableBit(const std::string
&VarName
, BitsInit
*BI
, int bit
);
48 std::string
getInstructionCase(Record
*R
, CodeGenTarget
&Target
);
49 void AddCodeToMergeInOperand(Record
*R
, BitsInit
*BI
,
50 const std::string
&VarName
,
52 std::set
<unsigned> &NamedOpIndices
,
53 std::string
&Case
, CodeGenTarget
&Target
);
59 // If the VarBitInit at position 'bit' matches the specified variable then
60 // return the variable bit position. Otherwise return -1.
61 int CodeEmitterGen::getVariableBit(const std::string
&VarName
,
62 BitsInit
*BI
, int bit
) {
63 if (VarBitInit
*VBI
= dyn_cast
<VarBitInit
>(BI
->getBit(bit
))) {
64 if (VarInit
*VI
= dyn_cast
<VarInit
>(VBI
->getBitVar()))
65 if (VI
->getName() == VarName
)
66 return VBI
->getBitNum();
67 } else if (VarInit
*VI
= dyn_cast
<VarInit
>(BI
->getBit(bit
))) {
68 if (VI
->getName() == VarName
)
76 AddCodeToMergeInOperand(Record
*R
, BitsInit
*BI
, const std::string
&VarName
,
78 std::set
<unsigned> &NamedOpIndices
,
79 std::string
&Case
, CodeGenTarget
&Target
) {
80 CodeGenInstruction
&CGI
= Target
.getInstruction(R
);
82 // Determine if VarName actually contributes to the Inst encoding.
83 int bit
= BI
->getNumBits()-1;
85 // Scan for a bit that this contributed to.
87 if (getVariableBit(VarName
, BI
, bit
) != -1)
93 // If we found no bits, ignore this value, otherwise emit the call to get the
97 // If the operand matches by name, reference according to that
98 // operand number. Non-matching operands are assumed to be in
101 if (CGI
.Operands
.hasOperandNamed(VarName
, OpIdx
)) {
102 // Get the machine operand number for the indicated operand.
103 OpIdx
= CGI
.Operands
[OpIdx
].MIOperandNo
;
104 assert(!CGI
.Operands
.isFlatOperandNotEmitted(OpIdx
) &&
105 "Explicitly used operand also marked as not emitted!");
107 unsigned NumberOps
= CGI
.Operands
.size();
108 /// If this operand is not supposed to be emitted by the
109 /// generated emitter, skip it.
110 while (NumberedOp
< NumberOps
&&
111 (CGI
.Operands
.isFlatOperandNotEmitted(NumberedOp
) ||
112 (!NamedOpIndices
.empty() && NamedOpIndices
.count(
113 CGI
.Operands
.getSubOperandNumber(NumberedOp
).first
)))) {
116 if (NumberedOp
>= CGI
.Operands
.back().MIOperandNo
+
117 CGI
.Operands
.back().MINumOperands
) {
118 errs() << "Too few operands in record " << R
->getName() <<
119 " (no match for variable " << VarName
<< "):\n";
127 OpIdx
= NumberedOp
++;
130 std::pair
<unsigned, unsigned> SO
= CGI
.Operands
.getSubOperandNumber(OpIdx
);
131 std::string
&EncoderMethodName
= CGI
.Operands
[SO
.first
].EncoderMethodName
;
134 Case
+= " op.clearAllBits();\n";
136 // If the source operand has a custom encoder, use it. This will
137 // get the encoding for all of the suboperands.
138 if (!EncoderMethodName
.empty()) {
139 // A custom encoder has all of the information for the
140 // sub-operands, if there are more than one, so only
141 // query the encoder once per source operand.
142 if (SO
.second
== 0) {
143 Case
+= " // op: " + VarName
+ "\n";
145 Case
+= " " + EncoderMethodName
+ "(MI, " + utostr(OpIdx
);
148 Case
+= " op = " + EncoderMethodName
+ "(MI, " + utostr(OpIdx
);
150 Case
+= ", Fixups, STI);\n";
153 Case
+= " // op: " + VarName
+ "\n";
155 Case
+= " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx
) + ")";
156 Case
+= ", op, Fixups, STI";
158 Case
+= " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx
) + ")";
159 Case
+= ", Fixups, STI";
164 // Precalculate the number of lits this variable contributes to in the
165 // operand. If there is a single lit (consecutive range of bits) we can use a
166 // destructive sequence on APInt that reduces memory allocations.
167 int numOperandLits
= 0;
168 for (int tmpBit
= bit
; tmpBit
>= 0;) {
169 int varBit
= getVariableBit(VarName
, BI
, tmpBit
);
171 // If this bit isn't from a variable, skip it.
177 // Figure out the consecutive range of bits covered by this operand, in
178 // order to generate better encoding code.
179 int beginVarBit
= varBit
;
181 for (--tmpBit
; tmpBit
>= 0;) {
182 varBit
= getVariableBit(VarName
, BI
, tmpBit
);
183 if (varBit
== -1 || varBit
!= (beginVarBit
- N
))
192 int varBit
= getVariableBit(VarName
, BI
, bit
);
194 // If this bit isn't from a variable, skip it.
200 // Figure out the consecutive range of bits covered by this operand, in
201 // order to generate better encoding code.
202 int beginInstBit
= bit
;
203 int beginVarBit
= varBit
;
205 for (--bit
; bit
>= 0;) {
206 varBit
= getVariableBit(VarName
, BI
, bit
);
207 if (varBit
== -1 || varBit
!= (beginVarBit
- N
)) break;
216 unsigned loBit
= beginVarBit
- N
+ 1;
217 unsigned hiBit
= loBit
+ N
;
218 maskStr
= "M" + itostr(bit
);
219 Case
+= " const APInt " + maskStr
+ " = APInt::getBitsSet(" +
220 itostr(BitWidth
) + ", " + itostr(loBit
) + ", " + itostr(hiBit
) +
223 uint64_t opMask
= ~(uint64_t)0 >> (64 - N
);
224 opShift
= beginVarBit
- N
+ 1;
226 maskStr
= "UINT64_C(" + utostr(opMask
) + ")";
228 opShift
= beginInstBit
- beginVarBit
;
230 if (numOperandLits
== 1) {
231 // Because Op may be an APInt, ensure all arithmetic is done in-place
232 // where possible to elide copies.
233 Case
+= " op &= " + maskStr
+ ";\n";
235 Case
+= " op <<= " + itostr(opShift
) + ";\n";
236 } else if (opShift
< 0) {
237 Case
+= " op >>= " + itostr(-opShift
) + ";\n";
239 Case
+= " Value |= op;\n";
242 Case
+= " Value |= (op & " + maskStr
+ ") << " + itostr(opShift
) +
244 } else if (opShift
< 0) {
245 Case
+= " Value |= (op & " + maskStr
+ ") >> " + itostr(-opShift
) +
248 Case
+= " Value |= (op & " + maskStr
+ ");\n";
254 std::string
CodeEmitterGen::getInstructionCase(Record
*R
,
255 CodeGenTarget
&Target
) {
257 BitsInit
*BI
= R
->getValueAsBitsInit("Inst");
258 unsigned NumberedOp
= 0;
259 std::set
<unsigned> NamedOpIndices
;
261 // Collect the set of operand indices that might correspond to named
262 // operand, and skip these when assigning operands based on position.
263 if (Target
.getInstructionSet()->
264 getValueAsBit("noNamedPositionallyEncodedOperands")) {
265 CodeGenInstruction
&CGI
= Target
.getInstruction(R
);
266 for (const RecordVal
&RV
: R
->getValues()) {
268 if (!CGI
.Operands
.hasOperandNamed(RV
.getName(), OpIdx
))
271 NamedOpIndices
.insert(OpIdx
);
275 // Loop over all of the fields in the instruction, determining which are the
276 // operands to the instruction.
277 for (const RecordVal
&RV
: R
->getValues()) {
278 // Ignore fixed fields in the record, we're looking for values like:
279 // bits<5> RST = { ?, ?, ?, ?, ? };
280 if (RV
.getPrefix() || RV
.getValue()->isComplete())
283 AddCodeToMergeInOperand(R
, BI
, RV
.getName(), NumberedOp
,
284 NamedOpIndices
, Case
, Target
);
287 StringRef PostEmitter
= R
->getValueAsString("PostEncoderMethod");
288 if (!PostEmitter
.empty()) {
291 Case
+= "(MI, Value";
300 getNameForFeatureBitset(const std::vector
<Record
*> &FeatureBitset
) {
301 std::string Name
= "CEFBS";
302 for (const auto &Feature
: FeatureBitset
)
303 Name
+= ("_" + Feature
->getName()).str();
307 static void emitInstBits(raw_ostream
&OS
, const APInt
&Bits
) {
308 for (unsigned I
= 0; I
< Bits
.getNumWords(); ++I
)
309 OS
<< ((I
> 0) ? ", " : "") << "UINT64_C(" << utostr(Bits
.getRawData()[I
])
313 void CodeEmitterGen::run(raw_ostream
&o
) {
314 CodeGenTarget
Target(Records
);
315 std::vector
<Record
*> Insts
= Records
.getAllDerivedDefinitions("Instruction");
317 // For little-endian instruction bit encodings, reverse the bit order
318 Target
.reverseBitsForLittleEndianEncoding();
320 ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
=
321 Target
.getInstructionsByEnumValue();
323 // Default to something sensible in case the target doesn't define Inst.
325 for (const CodeGenInstruction
*CGI
: NumberedInstructions
) {
326 Record
*R
= CGI
->TheDef
;
327 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
328 R
->getValueAsBit("isPseudo"))
331 BitsInit
*BI
= R
->getValueAsBitsInit("Inst");
332 BitWidth
= BI
->getNumBits();
335 UseAPInt
= BitWidth
> 64;
337 // Emit function declaration
339 o
<< "void " << Target
.getName()
340 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
341 << " SmallVectorImpl<MCFixup> &Fixups,\n"
343 << " APInt &Scratch,\n"
344 << " const MCSubtargetInfo &STI) const {\n";
346 o
<< "uint64_t " << Target
.getName();
347 o
<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
348 << " SmallVectorImpl<MCFixup> &Fixups,\n"
349 << " const MCSubtargetInfo &STI) const {\n";
352 // Emit instruction base values
353 o
<< " static const uint64_t InstBits[] = {\n";
354 for (const CodeGenInstruction
*CGI
: NumberedInstructions
) {
355 Record
*R
= CGI
->TheDef
;
357 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
358 R
->getValueAsBit("isPseudo")) {
359 o
<< " "; emitInstBits(o
, APInt(BitWidth
, 0)); o
<< ",\n";
363 BitsInit
*BI
= R
->getValueAsBitsInit("Inst");
364 BitWidth
= BI
->getNumBits();
366 // Start by filling in fixed values.
367 APInt
Value(BitWidth
, 0);
368 for (unsigned i
= 0, e
= BI
->getNumBits(); i
!= e
; ++i
) {
369 if (BitInit
*B
= dyn_cast
<BitInit
>(BI
->getBit(e
- i
- 1)))
370 Value
|= APInt(BitWidth
, (uint64_t)B
->getValue()) << (e
- i
- 1);
373 emitInstBits(o
, Value
);
374 o
<< "," << '\t' << "// " << R
->getName() << "\n";
376 o
<< " UINT64_C(0)\n };\n";
378 // Map to accumulate all the cases.
379 std::map
<std::string
, std::vector
<std::string
>> CaseMap
;
381 // Construct all cases statement for each opcode
382 for (std::vector
<Record
*>::iterator IC
= Insts
.begin(), EC
= Insts
.end();
385 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
386 R
->getValueAsBit("isPseudo"))
388 std::string InstName
=
389 (R
->getValueAsString("Namespace") + "::" + R
->getName()).str();
390 std::string Case
= getInstructionCase(R
, Target
);
392 CaseMap
[Case
].push_back(std::move(InstName
));
395 // Emit initial function code
397 int NumWords
= APInt::getNumWords(BitWidth
);
398 int NumBytes
= (BitWidth
+ 7) / 8;
399 o
<< " const unsigned opcode = MI.getOpcode();\n"
400 << " if (Inst.getBitWidth() != " << BitWidth
<< ")\n"
401 << " Inst = Inst.zext(" << BitWidth
<< ");\n"
402 << " if (Scratch.getBitWidth() != " << BitWidth
<< ")\n"
403 << " Scratch = Scratch.zext(" << BitWidth
<< ");\n"
404 << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords
405 << "], " << NumBytes
<< ");\n"
406 << " APInt &Value = Inst;\n"
407 << " APInt &op = Scratch;\n"
408 << " switch (opcode) {\n";
410 o
<< " const unsigned opcode = MI.getOpcode();\n"
411 << " uint64_t Value = InstBits[opcode];\n"
412 << " uint64_t op = 0;\n"
413 << " (void)op; // suppress warning\n"
414 << " switch (opcode) {\n";
417 // Emit each case statement
418 std::map
<std::string
, std::vector
<std::string
>>::iterator IE
, EE
;
419 for (IE
= CaseMap
.begin(), EE
= CaseMap
.end(); IE
!= EE
; ++IE
) {
420 const std::string
&Case
= IE
->first
;
421 std::vector
<std::string
> &InstList
= IE
->second
;
423 for (int i
= 0, N
= InstList
.size(); i
< N
; i
++) {
425 o
<< " case " << InstList
[i
] << ":";
433 // Default case: unhandled opcode
435 << " std::string msg;\n"
436 << " raw_string_ostream Msg(msg);\n"
437 << " Msg << \"Not supported instr: \" << MI;\n"
438 << " report_fatal_error(Msg.str());\n"
440 << " return Value;\n"
443 const auto &All
= SubtargetFeatureInfo::getAll(Records
);
444 std::map
<Record
*, SubtargetFeatureInfo
, LessRecordByID
> SubtargetFeatures
;
445 SubtargetFeatures
.insert(All
.begin(), All
.end());
447 o
<< "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n"
448 << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n"
449 << "#include <sstream>\n\n";
451 // Emit the subtarget feature enumeration.
452 SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures
,
455 // Emit the name table for error messages.
456 o
<< "#ifndef NDEBUG\n";
457 SubtargetFeatureInfo::emitNameTable(SubtargetFeatures
, o
);
458 o
<< "#endif // NDEBUG\n";
460 // Emit the available features compute function.
461 SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
462 Target
.getName(), "MCCodeEmitter", "computeAvailableFeatures",
463 SubtargetFeatures
, o
);
465 std::vector
<std::vector
<Record
*>> FeatureBitsets
;
466 for (const CodeGenInstruction
*Inst
: Target
.getInstructionsByEnumValue()) {
467 FeatureBitsets
.emplace_back();
468 for (Record
*Predicate
: Inst
->TheDef
->getValueAsListOfDefs("Predicates")) {
469 const auto &I
= SubtargetFeatures
.find(Predicate
);
470 if (I
!= SubtargetFeatures
.end())
471 FeatureBitsets
.back().push_back(I
->second
.TheDef
);
475 llvm::sort(FeatureBitsets
, [&](const std::vector
<Record
*> &A
,
476 const std::vector
<Record
*> &B
) {
477 if (A
.size() < B
.size())
479 if (A
.size() > B
.size())
481 for (const auto &Pair
: zip(A
, B
)) {
482 if (std::get
<0>(Pair
)->getName() < std::get
<1>(Pair
)->getName())
484 if (std::get
<0>(Pair
)->getName() > std::get
<1>(Pair
)->getName())
489 FeatureBitsets
.erase(
490 std::unique(FeatureBitsets
.begin(), FeatureBitsets
.end()),
491 FeatureBitsets
.end());
492 o
<< "#ifndef NDEBUG\n"
493 << "// Feature bitsets.\n"
494 << "enum : " << getMinimalTypeForRange(FeatureBitsets
.size()) << " {\n"
496 for (const auto &FeatureBitset
: FeatureBitsets
) {
497 if (FeatureBitset
.empty())
499 o
<< " " << getNameForFeatureBitset(FeatureBitset
) << ",\n";
502 << "static constexpr FeatureBitset FeatureBitsets[] = {\n"
503 << " {}, // CEFBS_None\n";
504 for (const auto &FeatureBitset
: FeatureBitsets
) {
505 if (FeatureBitset
.empty())
508 for (const auto &Feature
: FeatureBitset
) {
509 const auto &I
= SubtargetFeatures
.find(Feature
);
510 assert(I
!= SubtargetFeatures
.end() && "Didn't import predicate?");
511 o
<< I
->second
.getEnumBitName() << ", ";
516 << "#endif // NDEBUG\n\n";
519 // Emit the predicate verifier.
520 o
<< "void " << Target
.getName()
521 << "MCCodeEmitter::verifyInstructionPredicates(\n"
522 << " const MCInst &Inst, const FeatureBitset &AvailableFeatures) const {\n"
523 << "#ifndef NDEBUG\n"
524 << " static " << getMinimalTypeForRange(FeatureBitsets
.size())
525 << " RequiredFeaturesRefs[] = {\n";
526 unsigned InstIdx
= 0;
527 for (const CodeGenInstruction
*Inst
: Target
.getInstructionsByEnumValue()) {
529 unsigned NumPredicates
= 0;
530 for (Record
*Predicate
: Inst
->TheDef
->getValueAsListOfDefs("Predicates")) {
531 const auto &I
= SubtargetFeatures
.find(Predicate
);
532 if (I
!= SubtargetFeatures
.end()) {
533 o
<< '_' << I
->second
.TheDef
->getName();
539 o
<< ", // " << Inst
->TheDef
->getName() << " = " << InstIdx
<< "\n";
543 o
<< " assert(Inst.getOpcode() < " << InstIdx
<< ");\n";
544 o
<< " const FeatureBitset &RequiredFeatures = "
545 "FeatureBitsets[RequiredFeaturesRefs[Inst.getOpcode()]];\n";
546 o
<< " FeatureBitset MissingFeatures =\n"
547 << " (AvailableFeatures & RequiredFeatures) ^\n"
548 << " RequiredFeatures;\n"
549 << " if (MissingFeatures.any()) {\n"
550 << " std::ostringstream Msg;\n"
551 << " Msg << \"Attempting to emit \" << "
552 "MCII.getName(Inst.getOpcode()).str()\n"
553 << " << \" instruction but the \";\n"
554 << " for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n"
555 << " if (MissingFeatures.test(i))\n"
556 << " Msg << SubtargetFeatureNames[i] << \" \";\n"
557 << " Msg << \"predicate(s) are not met\";\n"
558 << " report_fatal_error(Msg.str());\n"
561 << "// Silence unused variable warning on targets that don't use MCII for "
562 "other purposes (e.g. BPF).\n"
564 << "#endif // NDEBUG\n";
569 } // end anonymous namespace
573 void EmitCodeEmitter(RecordKeeper
&RK
, raw_ostream
&OS
) {
574 emitSourceFileHeader("Machine Code Emitter", OS
);
575 CodeEmitterGen(RK
).run(OS
);
578 } // end namespace llvm