1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function called
11 // getBinaryCodeForInstr() that, given a MCInst, returns the value of the
12 // instruction - either as an uint64_t or as an APInt, depending on the
13 // maximum bit width of all Inst definitions.
15 // In addition, it generates another function called getOperandBitOffset()
16 // that, given a MCInst and an operand index, returns the minimum of indices of
17 // all bits that carry some portion of the respective operand. When the target's
18 // encodeInstruction() stores the instruction in a little-endian byte order, the
19 // returned value is the offset of the start of the operand in the encoded
20 // instruction. Other targets might need to adjust the returned value according
21 // to their encodeInstruction() implementation.
23 //===----------------------------------------------------------------------===//
25 #include "Common/CodeGenHwModes.h"
26 #include "Common/CodeGenInstruction.h"
27 #include "Common/CodeGenTarget.h"
28 #include "Common/InfoByHwMode.h"
29 #include "Common/VarLenCodeEmitterGen.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/TableGen/Error.h"
36 #include "llvm/TableGen/Record.h"
37 #include "llvm/TableGen/TableGenBackend.h"
49 class CodeEmitterGen
{
50 const RecordKeeper
&Records
;
53 CodeEmitterGen(const RecordKeeper
&R
) : Records(R
) {}
55 void run(raw_ostream
&O
);
58 int getVariableBit(const std::string
&VarName
, const BitsInit
*BI
, int Bit
);
59 std::pair
<std::string
, std::string
>
60 getInstructionCases(const Record
*R
, const CodeGenTarget
&Target
);
61 void addInstructionCasesForEncoding(const Record
*R
,
62 const Record
*EncodingDef
,
63 const CodeGenTarget
&Target
,
65 std::string
&BitOffsetCase
);
66 bool addCodeToMergeInOperand(const Record
*R
, const BitsInit
*BI
,
67 const std::string
&VarName
, std::string
&Case
,
68 std::string
&BitOffsetCase
,
69 const CodeGenTarget
&Target
);
71 void emitInstructionBaseValues(
72 raw_ostream
&O
, ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
,
73 const CodeGenTarget
&Target
, unsigned HwMode
= DefaultMode
);
75 emitCaseMap(raw_ostream
&O
,
76 const std::map
<std::string
, std::vector
<std::string
>> &CaseMap
);
77 unsigned BitWidth
= 0u;
78 bool UseAPInt
= false;
81 // If the VarBitInit at position 'bit' matches the specified variable then
82 // return the variable bit position. Otherwise return -1.
83 int CodeEmitterGen::getVariableBit(const std::string
&VarName
,
84 const BitsInit
*BI
, int Bit
) {
85 if (const VarBitInit
*VBI
= dyn_cast
<VarBitInit
>(BI
->getBit(Bit
))) {
86 if (const VarInit
*VI
= dyn_cast
<VarInit
>(VBI
->getBitVar()))
87 if (VI
->getName() == VarName
)
88 return VBI
->getBitNum();
89 } else if (const VarInit
*VI
= dyn_cast
<VarInit
>(BI
->getBit(Bit
))) {
90 if (VI
->getName() == VarName
)
97 // Returns true if it succeeds, false if an error.
98 bool CodeEmitterGen::addCodeToMergeInOperand(const Record
*R
,
100 const std::string
&VarName
,
102 std::string
&BitOffsetCase
,
103 const CodeGenTarget
&Target
) {
104 CodeGenInstruction
&CGI
= Target
.getInstruction(R
);
106 // Determine if VarName actually contributes to the Inst encoding.
107 int Bit
= BI
->getNumBits() - 1;
109 // Scan for a bit that this contributed to.
111 if (getVariableBit(VarName
, BI
, Bit
) != -1)
117 // If we found no bits, ignore this value, otherwise emit the call to get the
122 // If the operand matches by name, reference according to that
123 // operand number. Non-matching operands are assumed to be in
126 std::pair
<unsigned, unsigned> SubOp
;
127 if (CGI
.Operands
.hasSubOperandAlias(VarName
, SubOp
)) {
128 OpIdx
= CGI
.Operands
[SubOp
.first
].MIOperandNo
+ SubOp
.second
;
129 } else if (CGI
.Operands
.hasOperandNamed(VarName
, OpIdx
)) {
130 // Get the machine operand number for the indicated operand.
131 OpIdx
= CGI
.Operands
[OpIdx
].MIOperandNo
;
133 PrintError(R
, Twine("No operand named ") + VarName
+ " in record " +
138 if (CGI
.Operands
.isFlatOperandNotEmitted(OpIdx
)) {
140 "Operand " + VarName
+ " used but also marked as not emitted!");
144 std::pair
<unsigned, unsigned> SO
= CGI
.Operands
.getSubOperandNumber(OpIdx
);
145 std::string
&EncoderMethodName
=
146 CGI
.Operands
[SO
.first
].EncoderMethodNames
[SO
.second
];
149 Case
+= " op.clearAllBits();\n";
151 Case
+= " // op: " + VarName
+ "\n";
153 // If the source operand has a custom encoder, use it.
154 if (!EncoderMethodName
.empty()) {
156 Case
+= " " + EncoderMethodName
+ "(MI, " + utostr(OpIdx
);
159 Case
+= " op = " + EncoderMethodName
+ "(MI, " + utostr(OpIdx
);
161 Case
+= ", Fixups, STI);\n";
165 " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx
) + ")";
166 Case
+= ", op, Fixups, STI";
168 Case
+= " op = getMachineOpValue(MI, MI.getOperand(" +
170 Case
+= ", Fixups, STI";
175 // Precalculate the number of lits this variable contributes to in the
176 // operand. If there is a single lit (consecutive range of bits) we can use a
177 // destructive sequence on APInt that reduces memory allocations.
178 int NumOperandLits
= 0;
179 for (int TmpBit
= Bit
; TmpBit
>= 0;) {
180 int VarBit
= getVariableBit(VarName
, BI
, TmpBit
);
182 // If this bit isn't from a variable, skip it.
188 // Figure out the consecutive range of bits covered by this operand, in
189 // order to generate better encoding code.
190 int BeginVarBit
= VarBit
;
192 for (--TmpBit
; TmpBit
>= 0;) {
193 VarBit
= getVariableBit(VarName
, BI
, TmpBit
);
194 if (VarBit
== -1 || VarBit
!= (BeginVarBit
- N
))
202 unsigned BitOffset
= -1;
204 int VarBit
= getVariableBit(VarName
, BI
, Bit
);
206 // If this bit isn't from a variable, skip it.
212 // Figure out the consecutive range of bits covered by this operand, in
213 // order to generate better encoding code.
214 int BeginInstBit
= Bit
;
215 int BeginVarBit
= VarBit
;
217 for (--Bit
; Bit
>= 0;) {
218 VarBit
= getVariableBit(VarName
, BI
, Bit
);
219 if (VarBit
== -1 || VarBit
!= (BeginVarBit
- N
))
228 unsigned LoBit
= BeginVarBit
- N
+ 1;
229 unsigned HiBit
= LoBit
+ N
;
230 unsigned LoInstBit
= BeginInstBit
- N
+ 1;
231 BitOffset
= LoInstBit
;
233 std::string ExtractStr
;
235 ExtractStr
= "op.extractBits(" + itostr(HiBit
- LoBit
) + ", " +
237 Case
+= " Value.insertBits(" + ExtractStr
+ ", " +
238 itostr(LoInstBit
) + ");\n";
240 ExtractStr
= "op.extractBitsAsZExtValue(" + itostr(HiBit
- LoBit
) +
241 ", " + itostr(LoBit
) + ")";
242 Case
+= " Value.insertBits(" + ExtractStr
+ ", " +
243 itostr(LoInstBit
) + ", " + itostr(HiBit
- LoBit
) + ");\n";
246 uint64_t OpMask
= ~(uint64_t)0 >> (64 - N
);
247 OpShift
= BeginVarBit
- N
+ 1;
249 MaskStr
= "UINT64_C(" + utostr(OpMask
) + ")";
250 OpShift
= BeginInstBit
- BeginVarBit
;
252 if (NumOperandLits
== 1) {
253 Case
+= " op &= " + MaskStr
+ ";\n";
255 Case
+= " op <<= " + itostr(OpShift
) + ";\n";
256 } else if (OpShift
< 0) {
257 Case
+= " op >>= " + itostr(-OpShift
) + ";\n";
259 Case
+= " Value |= op;\n";
262 Case
+= " Value |= (op & " + MaskStr
+ ") << " +
263 itostr(OpShift
) + ";\n";
264 } else if (OpShift
< 0) {
265 Case
+= " Value |= (op & " + MaskStr
+ ") >> " +
266 itostr(-OpShift
) + ";\n";
268 Case
+= " Value |= (op & " + MaskStr
+ ");\n";
274 if (BitOffset
!= (unsigned)-1) {
275 BitOffsetCase
+= " case " + utostr(OpIdx
) + ":\n";
276 BitOffsetCase
+= " // op: " + VarName
+ "\n";
277 BitOffsetCase
+= " return " + utostr(BitOffset
) + ";\n";
283 std::pair
<std::string
, std::string
>
284 CodeEmitterGen::getInstructionCases(const Record
*R
,
285 const CodeGenTarget
&Target
) {
286 std::string Case
, BitOffsetCase
;
288 auto Append
= [&](const std::string
&S
) {
293 if (const RecordVal
*RV
= R
->getValue("EncodingInfos")) {
294 if (const auto *DI
= dyn_cast_or_null
<DefInit
>(RV
->getValue())) {
295 const CodeGenHwModes
&HWM
= Target
.getHwModes();
296 EncodingInfoByHwMode
EBM(DI
->getDef(), HWM
);
298 // Invoke the interface to obtain the HwMode ID controlling the
299 // EncodingInfo for the current subtarget. This interface will
300 // mask off irrelevant HwMode IDs.
301 Append(" unsigned HwMode = "
302 "STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);\n");
303 Case
+= " switch (HwMode) {\n";
304 Case
+= " default: llvm_unreachable(\"Unknown hardware mode!\"); "
306 for (auto &[ModeId
, Encoding
] : EBM
) {
307 if (ModeId
== DefaultMode
) {
309 " case " + itostr(DefaultMode
) + ": InstBitsByHw = InstBits";
311 Case
+= " case " + itostr(ModeId
) +
312 ": InstBitsByHw = InstBits_" +
313 std::string(HWM
.getMode(ModeId
).Name
);
315 Case
+= "; break;\n";
319 // We need to remodify the 'Inst' value from the table we found above.
321 int NumWords
= APInt::getNumWords(BitWidth
);
322 Case
+= " Inst = APInt(" + itostr(BitWidth
);
323 Case
+= ", ArrayRef(InstBitsByHw + opcode * " + itostr(NumWords
) +
324 ", " + itostr(NumWords
);
326 Case
+= " Value = Inst;\n";
328 Case
+= " Value = InstBitsByHw[opcode];\n";
331 Append(" switch (HwMode) {\n");
332 Append(" default: llvm_unreachable(\"Unhandled HwMode\");\n");
333 for (auto &[ModeId
, Encoding
] : EBM
) {
334 Append(" case " + itostr(ModeId
) + ": {\n");
335 addInstructionCasesForEncoding(R
, Encoding
, Target
, Case
,
341 return std::pair(std::move(Case
), std::move(BitOffsetCase
));
344 addInstructionCasesForEncoding(R
, R
, Target
, Case
, BitOffsetCase
);
345 return std::pair(std::move(Case
), std::move(BitOffsetCase
));
348 void CodeEmitterGen::addInstructionCasesForEncoding(
349 const Record
*R
, const Record
*EncodingDef
, const CodeGenTarget
&Target
,
350 std::string
&Case
, std::string
&BitOffsetCase
) {
351 const BitsInit
*BI
= EncodingDef
->getValueAsBitsInit("Inst");
353 // Loop over all of the fields in the instruction, determining which are the
354 // operands to the instruction.
356 size_t OrigBitOffsetCaseSize
= BitOffsetCase
.size();
357 BitOffsetCase
+= " switch (OpNum) {\n";
358 size_t BitOffsetCaseSizeBeforeLoop
= BitOffsetCase
.size();
359 for (const RecordVal
&RV
: EncodingDef
->getValues()) {
360 // Ignore fixed fields in the record, we're looking for values like:
361 // bits<5> RST = { ?, ?, ?, ?, ? };
362 if (RV
.isNonconcreteOK() || RV
.getValue()->isComplete())
365 Success
&= addCodeToMergeInOperand(R
, BI
, std::string(RV
.getName()), Case
,
366 BitOffsetCase
, Target
);
368 // Avoid empty switches.
369 if (BitOffsetCase
.size() == BitOffsetCaseSizeBeforeLoop
)
370 BitOffsetCase
.resize(OrigBitOffsetCaseSize
);
372 BitOffsetCase
+= " }\n";
375 // Dump the record, so we can see what's going on...
377 raw_string_ostream
S(E
);
378 S
<< "Dumping record for previous error:\n";
383 StringRef PostEmitter
= R
->getValueAsString("PostEncoderMethod");
384 if (!PostEmitter
.empty()) {
387 Case
+= "(MI, Value";
393 static void emitInstBits(raw_ostream
&OS
, const APInt
&Bits
) {
394 for (unsigned I
= 0; I
< Bits
.getNumWords(); ++I
)
395 OS
<< ((I
> 0) ? ", " : "") << "UINT64_C(" << utostr(Bits
.getRawData()[I
])
399 void CodeEmitterGen::emitInstructionBaseValues(
400 raw_ostream
&O
, ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
,
401 const CodeGenTarget
&Target
, unsigned HwMode
) {
402 const CodeGenHwModes
&HWM
= Target
.getHwModes();
403 if (HwMode
== DefaultMode
)
404 O
<< " static const uint64_t InstBits[] = {\n";
406 O
<< " static const uint64_t InstBits_"
407 << HWM
.getModeName(HwMode
, /*IncludeDefault=*/true) << "[] = {\n";
409 for (const CodeGenInstruction
*CGI
: NumberedInstructions
) {
410 const Record
*R
= CGI
->TheDef
;
412 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
413 R
->getValueAsBit("isPseudo")) {
415 emitInstBits(O
, APInt(BitWidth
, 0));
420 const Record
*EncodingDef
= R
;
421 if (const RecordVal
*RV
= R
->getValue("EncodingInfos")) {
422 if (auto *DI
= dyn_cast_or_null
<DefInit
>(RV
->getValue())) {
423 EncodingInfoByHwMode
EBM(DI
->getDef(), HWM
);
424 if (EBM
.hasMode(HwMode
)) {
425 EncodingDef
= EBM
.get(HwMode
);
427 // If the HwMode does not match, then Encoding '0'
428 // should be generated.
429 APInt
Value(BitWidth
, 0);
431 emitInstBits(O
, Value
);
432 O
<< "," << '\t' << "// " << R
->getName() << "\n";
437 const BitsInit
*BI
= EncodingDef
->getValueAsBitsInit("Inst");
439 // Start by filling in fixed values.
440 APInt
Value(BitWidth
, 0);
441 for (unsigned I
= 0, E
= BI
->getNumBits(); I
!= E
; ++I
) {
442 if (const auto *B
= dyn_cast
<BitInit
>(BI
->getBit(I
)); B
&& B
->getValue())
446 emitInstBits(O
, Value
);
447 O
<< "," << '\t' << "// " << R
->getName() << "\n";
449 O
<< " UINT64_C(0)\n };\n";
452 void CodeEmitterGen::emitCaseMap(
454 const std::map
<std::string
, std::vector
<std::string
>> &CaseMap
) {
455 for (const auto &[Case
, InstList
] : CaseMap
) {
457 for (const auto &Inst
: InstList
) {
460 O
<< " case " << Inst
<< ":";
470 void CodeEmitterGen::run(raw_ostream
&O
) {
471 emitSourceFileHeader("Machine Code Emitter", O
);
473 CodeGenTarget
Target(Records
);
475 // For little-endian instruction bit encodings, reverse the bit order
476 Target
.reverseBitsForLittleEndianEncoding();
478 ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
=
479 Target
.getInstructionsByEnumValue();
481 if (Target
.hasVariableLengthEncodings()) {
482 emitVarLenCodeEmitter(Records
, O
);
484 const CodeGenHwModes
&HWM
= Target
.getHwModes();
485 // The set of HwModes used by instruction encodings.
486 std::set
<unsigned> HwModes
;
488 for (const CodeGenInstruction
*CGI
: NumberedInstructions
) {
489 const Record
*R
= CGI
->TheDef
;
490 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
491 R
->getValueAsBit("isPseudo"))
494 if (const RecordVal
*RV
= R
->getValue("EncodingInfos")) {
495 if (const DefInit
*DI
= dyn_cast_or_null
<DefInit
>(RV
->getValue())) {
496 EncodingInfoByHwMode
EBM(DI
->getDef(), HWM
);
497 for (const auto &[Key
, Value
] : EBM
) {
498 const BitsInit
*BI
= Value
->getValueAsBitsInit("Inst");
499 BitWidth
= std::max(BitWidth
, BI
->getNumBits());
505 const BitsInit
*BI
= R
->getValueAsBitsInit("Inst");
506 BitWidth
= std::max(BitWidth
, BI
->getNumBits());
508 UseAPInt
= BitWidth
> 64;
510 // Emit function declaration
512 O
<< "void " << Target
.getName()
513 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
514 << " SmallVectorImpl<MCFixup> &Fixups,\n"
516 << " APInt &Scratch,\n"
517 << " const MCSubtargetInfo &STI) const {\n";
519 O
<< "uint64_t " << Target
.getName();
520 O
<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
521 << " SmallVectorImpl<MCFixup> &Fixups,\n"
522 << " const MCSubtargetInfo &STI) const {\n";
525 // Emit instruction base values
526 emitInstructionBaseValues(O
, NumberedInstructions
, Target
, DefaultMode
);
527 if (!HwModes
.empty()) {
528 // Emit table for instrs whose encodings are controlled by HwModes.
529 for (unsigned HwMode
: HwModes
) {
530 if (HwMode
== DefaultMode
)
532 emitInstructionBaseValues(O
, NumberedInstructions
, Target
, HwMode
);
535 // This pointer will be assigned to the HwMode table later.
536 O
<< " const uint64_t *InstBitsByHw;\n";
539 // Map to accumulate all the cases.
540 std::map
<std::string
, std::vector
<std::string
>> CaseMap
;
541 std::map
<std::string
, std::vector
<std::string
>> BitOffsetCaseMap
;
543 // Construct all cases statement for each opcode
544 for (const Record
*R
: Records
.getAllDerivedDefinitions("Instruction")) {
545 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
546 R
->getValueAsBit("isPseudo"))
548 std::string InstName
=
549 (R
->getValueAsString("Namespace") + "::" + R
->getName()).str();
550 std::string Case
, BitOffsetCase
;
551 std::tie(Case
, BitOffsetCase
) = getInstructionCases(R
, Target
);
553 CaseMap
[Case
].push_back(InstName
);
554 BitOffsetCaseMap
[BitOffsetCase
].push_back(std::move(InstName
));
557 // Emit initial function code
559 int NumWords
= APInt::getNumWords(BitWidth
);
560 O
<< " const unsigned opcode = MI.getOpcode();\n"
561 << " if (Scratch.getBitWidth() != " << BitWidth
<< ")\n"
562 << " Scratch = Scratch.zext(" << BitWidth
<< ");\n"
563 << " Inst = APInt(" << BitWidth
<< ", ArrayRef(InstBits + opcode * "
564 << NumWords
<< ", " << NumWords
<< "));\n"
565 << " APInt &Value = Inst;\n"
566 << " APInt &op = Scratch;\n"
567 << " switch (opcode) {\n";
569 O
<< " const unsigned opcode = MI.getOpcode();\n"
570 << " uint64_t Value = InstBits[opcode];\n"
571 << " uint64_t op = 0;\n"
572 << " (void)op; // suppress warning\n"
573 << " switch (opcode) {\n";
576 // Emit each case statement
577 emitCaseMap(O
, CaseMap
);
579 // Default case: unhandled opcode
581 << " std::string msg;\n"
582 << " raw_string_ostream Msg(msg);\n"
583 << " Msg << \"Not supported instr: \" << MI;\n"
584 << " report_fatal_error(Msg.str().c_str());\n"
587 O
<< " Inst = Value;\n";
589 O
<< " return Value;\n";
592 O
<< "#ifdef GET_OPERAND_BIT_OFFSET\n"
593 << "#undef GET_OPERAND_BIT_OFFSET\n\n"
594 << "uint32_t " << Target
.getName()
595 << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
596 << " unsigned OpNum,\n"
597 << " const MCSubtargetInfo &STI) const {\n"
598 << " switch (MI.getOpcode()) {\n";
599 emitCaseMap(O
, BitOffsetCaseMap
);
601 << " std::string msg;\n"
602 << " raw_string_ostream Msg(msg);\n"
603 << " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
605 << " report_fatal_error(Msg.str().c_str());\n"
607 << "#endif // GET_OPERAND_BIT_OFFSET\n\n";
611 } // end anonymous namespace
613 static TableGen::Emitter::OptClass
<CodeEmitterGen
>
614 X("gen-emitter", "Generate machine code emitter");