1 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // CodeEmitterGen uses the descriptions of instructions and their fields to
10 // construct an automated code emitter: a function called
11 // getBinaryCodeForInstr() that, given a MCInst, returns the value of the
12 // instruction - either as an uint64_t or as an APInt, depending on the
13 // maximum bit width of all Inst definitions.
15 // In addition, it generates another function called getOperandBitOffset()
16 // that, given a MCInst and an operand index, returns the minimum of indices of
17 // all bits that carry some portion of the respective operand. When the target's
18 // encodeInstruction() stores the instruction in a little-endian byte order, the
19 // returned value is the offset of the start of the operand in the encoded
20 // instruction. Other targets might need to adjust the returned value according
21 // to their encodeInstruction() implementation.
23 //===----------------------------------------------------------------------===//
25 #include "CodeGenHwModes.h"
26 #include "CodeGenInstruction.h"
27 #include "CodeGenTarget.h"
28 #include "InfoByHwMode.h"
29 #include "VarLenCodeEmitterGen.h"
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/TableGen/Error.h"
36 #include "llvm/TableGen/Record.h"
37 #include "llvm/TableGen/TableGenBackend.h"
49 class CodeEmitterGen
{
50 RecordKeeper
&Records
;
53 CodeEmitterGen(RecordKeeper
&R
) : Records(R
) {}
55 void run(raw_ostream
&o
);
58 int getVariableBit(const std::string
&VarName
, BitsInit
*BI
, int bit
);
59 std::pair
<std::string
, std::string
>
60 getInstructionCases(Record
*R
, CodeGenTarget
&Target
);
61 void addInstructionCasesForEncoding(Record
*R
, Record
*EncodingDef
,
62 CodeGenTarget
&Target
, std::string
&Case
,
63 std::string
&BitOffsetCase
);
64 bool addCodeToMergeInOperand(Record
*R
, BitsInit
*BI
,
65 const std::string
&VarName
, std::string
&Case
,
66 std::string
&BitOffsetCase
,
67 CodeGenTarget
&Target
);
69 void emitInstructionBaseValues(
70 raw_ostream
&o
, ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
,
71 CodeGenTarget
&Target
, int HwMode
= -1);
73 emitCaseMap(raw_ostream
&o
,
74 const std::map
<std::string
, std::vector
<std::string
>> &CaseMap
);
75 unsigned BitWidth
= 0u;
76 bool UseAPInt
= false;
79 // If the VarBitInit at position 'bit' matches the specified variable then
80 // return the variable bit position. Otherwise return -1.
81 int CodeEmitterGen::getVariableBit(const std::string
&VarName
,
82 BitsInit
*BI
, int bit
) {
83 if (VarBitInit
*VBI
= dyn_cast
<VarBitInit
>(BI
->getBit(bit
))) {
84 if (VarInit
*VI
= dyn_cast
<VarInit
>(VBI
->getBitVar()))
85 if (VI
->getName() == VarName
)
86 return VBI
->getBitNum();
87 } else if (VarInit
*VI
= dyn_cast
<VarInit
>(BI
->getBit(bit
))) {
88 if (VI
->getName() == VarName
)
95 // Returns true if it succeeds, false if an error.
96 bool CodeEmitterGen::addCodeToMergeInOperand(Record
*R
, BitsInit
*BI
,
97 const std::string
&VarName
,
99 std::string
&BitOffsetCase
,
100 CodeGenTarget
&Target
) {
101 CodeGenInstruction
&CGI
= Target
.getInstruction(R
);
103 // Determine if VarName actually contributes to the Inst encoding.
104 int bit
= BI
->getNumBits()-1;
106 // Scan for a bit that this contributed to.
108 if (getVariableBit(VarName
, BI
, bit
) != -1)
114 // If we found no bits, ignore this value, otherwise emit the call to get the
119 // If the operand matches by name, reference according to that
120 // operand number. Non-matching operands are assumed to be in
123 std::pair
<unsigned, unsigned> SubOp
;
124 if (CGI
.Operands
.hasSubOperandAlias(VarName
, SubOp
)) {
125 OpIdx
= CGI
.Operands
[SubOp
.first
].MIOperandNo
+ SubOp
.second
;
126 } else if (CGI
.Operands
.hasOperandNamed(VarName
, OpIdx
)) {
127 // Get the machine operand number for the indicated operand.
128 OpIdx
= CGI
.Operands
[OpIdx
].MIOperandNo
;
130 PrintError(R
, Twine("No operand named ") + VarName
+ " in record " + R
->getName());
134 if (CGI
.Operands
.isFlatOperandNotEmitted(OpIdx
)) {
135 PrintError(R
, "Operand " + VarName
+ " used but also marked as not emitted!");
139 std::pair
<unsigned, unsigned> SO
= CGI
.Operands
.getSubOperandNumber(OpIdx
);
140 std::string
&EncoderMethodName
=
141 CGI
.Operands
[SO
.first
].EncoderMethodNames
[SO
.second
];
144 Case
+= " op.clearAllBits();\n";
146 Case
+= " // op: " + VarName
+ "\n";
148 // If the source operand has a custom encoder, use it.
149 if (!EncoderMethodName
.empty()) {
151 Case
+= " " + EncoderMethodName
+ "(MI, " + utostr(OpIdx
);
154 Case
+= " op = " + EncoderMethodName
+ "(MI, " + utostr(OpIdx
);
156 Case
+= ", Fixups, STI);\n";
159 Case
+= " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx
) + ")";
160 Case
+= ", op, Fixups, STI";
162 Case
+= " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx
) + ")";
163 Case
+= ", Fixups, STI";
168 // Precalculate the number of lits this variable contributes to in the
169 // operand. If there is a single lit (consecutive range of bits) we can use a
170 // destructive sequence on APInt that reduces memory allocations.
171 int numOperandLits
= 0;
172 for (int tmpBit
= bit
; tmpBit
>= 0;) {
173 int varBit
= getVariableBit(VarName
, BI
, tmpBit
);
175 // If this bit isn't from a variable, skip it.
181 // Figure out the consecutive range of bits covered by this operand, in
182 // order to generate better encoding code.
183 int beginVarBit
= varBit
;
185 for (--tmpBit
; tmpBit
>= 0;) {
186 varBit
= getVariableBit(VarName
, BI
, tmpBit
);
187 if (varBit
== -1 || varBit
!= (beginVarBit
- N
))
195 unsigned BitOffset
= -1;
197 int varBit
= getVariableBit(VarName
, BI
, bit
);
199 // If this bit isn't from a variable, skip it.
205 // Figure out the consecutive range of bits covered by this operand, in
206 // order to generate better encoding code.
207 int beginInstBit
= bit
;
208 int beginVarBit
= varBit
;
210 for (--bit
; bit
>= 0;) {
211 varBit
= getVariableBit(VarName
, BI
, bit
);
212 if (varBit
== -1 || varBit
!= (beginVarBit
- N
)) break;
220 unsigned loBit
= beginVarBit
- N
+ 1;
221 unsigned hiBit
= loBit
+ N
;
222 unsigned loInstBit
= beginInstBit
- N
+ 1;
223 BitOffset
= loInstBit
;
225 std::string extractStr
;
227 extractStr
= "op.extractBits(" + itostr(hiBit
- loBit
) + ", " +
229 Case
+= " Value.insertBits(" + extractStr
+ ", " +
230 itostr(loInstBit
) + ");\n";
232 extractStr
= "op.extractBitsAsZExtValue(" + itostr(hiBit
- loBit
) +
233 ", " + itostr(loBit
) + ")";
234 Case
+= " Value.insertBits(" + extractStr
+ ", " +
235 itostr(loInstBit
) + ", " + itostr(hiBit
- loBit
) + ");\n";
238 uint64_t opMask
= ~(uint64_t)0 >> (64 - N
);
239 opShift
= beginVarBit
- N
+ 1;
241 maskStr
= "UINT64_C(" + utostr(opMask
) + ")";
242 opShift
= beginInstBit
- beginVarBit
;
244 if (numOperandLits
== 1) {
245 Case
+= " op &= " + maskStr
+ ";\n";
247 Case
+= " op <<= " + itostr(opShift
) + ";\n";
248 } else if (opShift
< 0) {
249 Case
+= " op >>= " + itostr(-opShift
) + ";\n";
251 Case
+= " Value |= op;\n";
254 Case
+= " Value |= (op & " + maskStr
+ ") << " +
255 itostr(opShift
) + ";\n";
256 } else if (opShift
< 0) {
257 Case
+= " Value |= (op & " + maskStr
+ ") >> " +
258 itostr(-opShift
) + ";\n";
260 Case
+= " Value |= (op & " + maskStr
+ ");\n";
266 if (BitOffset
!= (unsigned)-1) {
267 BitOffsetCase
+= " case " + utostr(OpIdx
) + ":\n";
268 BitOffsetCase
+= " // op: " + VarName
+ "\n";
269 BitOffsetCase
+= " return " + utostr(BitOffset
) + ";\n";
275 std::pair
<std::string
, std::string
>
276 CodeEmitterGen::getInstructionCases(Record
*R
, CodeGenTarget
&Target
) {
277 std::string Case
, BitOffsetCase
;
279 auto append
= [&](const char *S
) {
284 if (const RecordVal
*RV
= R
->getValue("EncodingInfos")) {
285 if (auto *DI
= dyn_cast_or_null
<DefInit
>(RV
->getValue())) {
286 const CodeGenHwModes
&HWM
= Target
.getHwModes();
287 EncodingInfoByHwMode
EBM(DI
->getDef(), HWM
);
288 append(" switch (HwMode) {\n");
289 append(" default: llvm_unreachable(\"Unhandled HwMode\");\n");
290 for (auto &KV
: EBM
) {
291 append((" case " + itostr(KV
.first
) + ": {\n").c_str());
292 addInstructionCasesForEncoding(R
, KV
.second
, Target
, Case
,
298 return std::make_pair(std::move(Case
), std::move(BitOffsetCase
));
301 addInstructionCasesForEncoding(R
, R
, Target
, Case
, BitOffsetCase
);
302 return std::make_pair(std::move(Case
), std::move(BitOffsetCase
));
305 void CodeEmitterGen::addInstructionCasesForEncoding(
306 Record
*R
, Record
*EncodingDef
, CodeGenTarget
&Target
, std::string
&Case
,
307 std::string
&BitOffsetCase
) {
308 BitsInit
*BI
= EncodingDef
->getValueAsBitsInit("Inst");
310 // Loop over all of the fields in the instruction, determining which are the
311 // operands to the instruction.
313 size_t OrigBitOffsetCaseSize
= BitOffsetCase
.size();
314 BitOffsetCase
+= " switch (OpNum) {\n";
315 size_t BitOffsetCaseSizeBeforeLoop
= BitOffsetCase
.size();
316 for (const RecordVal
&RV
: EncodingDef
->getValues()) {
317 // Ignore fixed fields in the record, we're looking for values like:
318 // bits<5> RST = { ?, ?, ?, ?, ? };
319 if (RV
.isNonconcreteOK() || RV
.getValue()->isComplete())
322 Success
&= addCodeToMergeInOperand(R
, BI
, std::string(RV
.getName()), Case
,
323 BitOffsetCase
, Target
);
325 // Avoid empty switches.
326 if (BitOffsetCase
.size() == BitOffsetCaseSizeBeforeLoop
)
327 BitOffsetCase
.resize(OrigBitOffsetCaseSize
);
329 BitOffsetCase
+= " }\n";
332 // Dump the record, so we can see what's going on...
334 raw_string_ostream
S(E
);
335 S
<< "Dumping record for previous error:\n";
340 StringRef PostEmitter
= R
->getValueAsString("PostEncoderMethod");
341 if (!PostEmitter
.empty()) {
344 Case
+= "(MI, Value";
350 static void emitInstBits(raw_ostream
&OS
, const APInt
&Bits
) {
351 for (unsigned I
= 0; I
< Bits
.getNumWords(); ++I
)
352 OS
<< ((I
> 0) ? ", " : "") << "UINT64_C(" << utostr(Bits
.getRawData()[I
])
356 void CodeEmitterGen::emitInstructionBaseValues(
357 raw_ostream
&o
, ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
,
358 CodeGenTarget
&Target
, int HwMode
) {
359 const CodeGenHwModes
&HWM
= Target
.getHwModes();
361 o
<< " static const uint64_t InstBits[] = {\n";
363 o
<< " static const uint64_t InstBits_" << HWM
.getMode(HwMode
).Name
366 for (const CodeGenInstruction
*CGI
: NumberedInstructions
) {
367 Record
*R
= CGI
->TheDef
;
369 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
370 R
->getValueAsBit("isPseudo")) {
371 o
<< " "; emitInstBits(o
, APInt(BitWidth
, 0)); o
<< ",\n";
375 Record
*EncodingDef
= R
;
376 if (const RecordVal
*RV
= R
->getValue("EncodingInfos")) {
377 if (auto *DI
= dyn_cast_or_null
<DefInit
>(RV
->getValue())) {
378 EncodingInfoByHwMode
EBM(DI
->getDef(), HWM
);
379 if (EBM
.hasMode(HwMode
))
380 EncodingDef
= EBM
.get(HwMode
);
383 BitsInit
*BI
= EncodingDef
->getValueAsBitsInit("Inst");
385 // Start by filling in fixed values.
386 APInt
Value(BitWidth
, 0);
387 for (unsigned i
= 0, e
= BI
->getNumBits(); i
!= e
; ++i
) {
388 if (auto *B
= dyn_cast
<BitInit
>(BI
->getBit(i
)); B
&& B
->getValue())
392 emitInstBits(o
, Value
);
393 o
<< "," << '\t' << "// " << R
->getName() << "\n";
395 o
<< " UINT64_C(0)\n };\n";
398 void CodeEmitterGen::emitCaseMap(
400 const std::map
<std::string
, std::vector
<std::string
>> &CaseMap
) {
401 std::map
<std::string
, std::vector
<std::string
>>::const_iterator IE
, EE
;
402 for (IE
= CaseMap
.begin(), EE
= CaseMap
.end(); IE
!= EE
; ++IE
) {
403 const std::string
&Case
= IE
->first
;
404 const std::vector
<std::string
> &InstList
= IE
->second
;
406 for (int i
= 0, N
= InstList
.size(); i
< N
; i
++) {
409 o
<< " case " << InstList
[i
] << ":";
418 void CodeEmitterGen::run(raw_ostream
&o
) {
419 emitSourceFileHeader("Machine Code Emitter", o
);
421 CodeGenTarget
Target(Records
);
422 std::vector
<Record
*> Insts
= Records
.getAllDerivedDefinitions("Instruction");
424 // For little-endian instruction bit encodings, reverse the bit order
425 Target
.reverseBitsForLittleEndianEncoding();
427 ArrayRef
<const CodeGenInstruction
*> NumberedInstructions
=
428 Target
.getInstructionsByEnumValue();
430 if (any_of(NumberedInstructions
, [](const CodeGenInstruction
*CGI
) {
431 Record
*R
= CGI
->TheDef
;
432 return R
->getValue("Inst") && isa
<DagInit
>(R
->getValueInit("Inst"));
434 emitVarLenCodeEmitter(Records
, o
);
436 const CodeGenHwModes
&HWM
= Target
.getHwModes();
437 // The set of HwModes used by instruction encodings.
438 std::set
<unsigned> HwModes
;
440 for (const CodeGenInstruction
*CGI
: NumberedInstructions
) {
441 Record
*R
= CGI
->TheDef
;
442 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
443 R
->getValueAsBit("isPseudo"))
446 if (const RecordVal
*RV
= R
->getValue("EncodingInfos")) {
447 if (DefInit
*DI
= dyn_cast_or_null
<DefInit
>(RV
->getValue())) {
448 EncodingInfoByHwMode
EBM(DI
->getDef(), HWM
);
449 for (auto &KV
: EBM
) {
450 BitsInit
*BI
= KV
.second
->getValueAsBitsInit("Inst");
451 BitWidth
= std::max(BitWidth
, BI
->getNumBits());
452 HwModes
.insert(KV
.first
);
457 BitsInit
*BI
= R
->getValueAsBitsInit("Inst");
458 BitWidth
= std::max(BitWidth
, BI
->getNumBits());
460 UseAPInt
= BitWidth
> 64;
462 // Emit function declaration
464 o
<< "void " << Target
.getName()
465 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
466 << " SmallVectorImpl<MCFixup> &Fixups,\n"
468 << " APInt &Scratch,\n"
469 << " const MCSubtargetInfo &STI) const {\n";
471 o
<< "uint64_t " << Target
.getName();
472 o
<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
473 << " SmallVectorImpl<MCFixup> &Fixups,\n"
474 << " const MCSubtargetInfo &STI) const {\n";
477 // Emit instruction base values
478 if (HwModes
.empty()) {
479 emitInstructionBaseValues(o
, NumberedInstructions
, Target
, -1);
481 for (unsigned HwMode
: HwModes
)
482 emitInstructionBaseValues(o
, NumberedInstructions
, Target
, (int)HwMode
);
485 if (!HwModes
.empty()) {
486 o
<< " const uint64_t *InstBits;\n";
487 o
<< " unsigned HwMode = STI.getHwMode();\n";
488 o
<< " switch (HwMode) {\n";
489 o
<< " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
490 for (unsigned I
: HwModes
) {
491 o
<< " case " << I
<< ": InstBits = InstBits_" << HWM
.getMode(I
).Name
497 // Map to accumulate all the cases.
498 std::map
<std::string
, std::vector
<std::string
>> CaseMap
;
499 std::map
<std::string
, std::vector
<std::string
>> BitOffsetCaseMap
;
501 // Construct all cases statement for each opcode
502 for (Record
*R
: Insts
) {
503 if (R
->getValueAsString("Namespace") == "TargetOpcode" ||
504 R
->getValueAsBit("isPseudo"))
506 std::string InstName
=
507 (R
->getValueAsString("Namespace") + "::" + R
->getName()).str();
508 std::string Case
, BitOffsetCase
;
509 std::tie(Case
, BitOffsetCase
) = getInstructionCases(R
, Target
);
511 CaseMap
[Case
].push_back(InstName
);
512 BitOffsetCaseMap
[BitOffsetCase
].push_back(std::move(InstName
));
515 // Emit initial function code
517 int NumWords
= APInt::getNumWords(BitWidth
);
518 o
<< " const unsigned opcode = MI.getOpcode();\n"
519 << " if (Scratch.getBitWidth() != " << BitWidth
<< ")\n"
520 << " Scratch = Scratch.zext(" << BitWidth
<< ");\n"
521 << " Inst = APInt(" << BitWidth
<< ", ArrayRef(InstBits + opcode * "
522 << NumWords
<< ", " << NumWords
<< "));\n"
523 << " APInt &Value = Inst;\n"
524 << " APInt &op = Scratch;\n"
525 << " switch (opcode) {\n";
527 o
<< " const unsigned opcode = MI.getOpcode();\n"
528 << " uint64_t Value = InstBits[opcode];\n"
529 << " uint64_t op = 0;\n"
530 << " (void)op; // suppress warning\n"
531 << " switch (opcode) {\n";
534 // Emit each case statement
535 emitCaseMap(o
, CaseMap
);
537 // Default case: unhandled opcode
539 << " std::string msg;\n"
540 << " raw_string_ostream Msg(msg);\n"
541 << " Msg << \"Not supported instr: \" << MI;\n"
542 << " report_fatal_error(Msg.str().c_str());\n"
545 o
<< " Inst = Value;\n";
547 o
<< " return Value;\n";
550 o
<< "#ifdef GET_OPERAND_BIT_OFFSET\n"
551 << "#undef GET_OPERAND_BIT_OFFSET\n\n"
552 << "uint32_t " << Target
.getName()
553 << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
554 << " unsigned OpNum,\n"
555 << " const MCSubtargetInfo &STI) const {\n"
556 << " switch (MI.getOpcode()) {\n";
557 emitCaseMap(o
, BitOffsetCaseMap
);
559 << " std::string msg;\n"
560 << " raw_string_ostream Msg(msg);\n"
561 << " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
563 << " report_fatal_error(Msg.str().c_str());\n"
565 << "#endif // GET_OPERAND_BIT_OFFSET\n\n";
569 } // end anonymous namespace
571 static TableGen::Emitter::OptClass
<CodeEmitterGen
>
572 X("gen-emitter", "Generate machine code emitter");