1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains code to lower X86 MachineInstrs to their corresponding
12 //===----------------------------------------------------------------------===//
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86EncodingOptimization.h"
17 #include "MCTargetDesc/X86InstComments.h"
18 #include "MCTargetDesc/X86ShuffleDecode.h"
19 #include "MCTargetDesc/X86TargetStreamer.h"
20 #include "X86AsmPrinter.h"
21 #include "X86MachineFunctionInfo.h"
22 #include "X86RegisterInfo.h"
23 #include "X86ShuffleDecodeConstantPool.h"
24 #include "X86Subtarget.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
30 #include "llvm/CodeGen/MachineOperand.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/GlobalValue.h"
34 #include "llvm/IR/Mangler.h"
35 #include "llvm/MC/MCAsmInfo.h"
36 #include "llvm/MC/MCCodeEmitter.h"
37 #include "llvm/MC/MCContext.h"
38 #include "llvm/MC/MCExpr.h"
39 #include "llvm/MC/MCFixup.h"
40 #include "llvm/MC/MCInst.h"
41 #include "llvm/MC/MCInstBuilder.h"
42 #include "llvm/MC/MCSection.h"
43 #include "llvm/MC/MCSectionELF.h"
44 #include "llvm/MC/MCStreamer.h"
45 #include "llvm/MC/MCSymbol.h"
46 #include "llvm/MC/MCSymbolELF.h"
47 #include "llvm/MC/TargetRegistry.h"
48 #include "llvm/Target/TargetLoweringObjectFile.h"
49 #include "llvm/Target/TargetMachine.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
58 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
59 class X86MCInstLower
{
61 const MachineFunction
&MF
;
62 const TargetMachine
&TM
;
64 X86AsmPrinter
&AsmPrinter
;
67 X86MCInstLower(const MachineFunction
&MF
, X86AsmPrinter
&asmprinter
);
69 std::optional
<MCOperand
> LowerMachineOperand(const MachineInstr
*MI
,
70 const MachineOperand
&MO
) const;
71 void Lower(const MachineInstr
*MI
, MCInst
&OutMI
) const;
73 MCSymbol
*GetSymbolFromOperand(const MachineOperand
&MO
) const;
74 MCOperand
LowerSymbolOperand(const MachineOperand
&MO
, MCSymbol
*Sym
) const;
77 MachineModuleInfoMachO
&getMachOMMI() const;
80 } // end anonymous namespace
82 /// A RAII helper which defines a region of instructions which can't have
83 /// padding added between them for correctness.
84 struct NoAutoPaddingScope
{
86 const bool OldAllowAutoPadding
;
87 NoAutoPaddingScope(MCStreamer
&OS
)
88 : OS(OS
), OldAllowAutoPadding(OS
.getAllowAutoPadding()) {
89 changeAndComment(false);
91 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding
); }
92 void changeAndComment(bool b
) {
93 if (b
== OS
.getAllowAutoPadding())
95 OS
.setAllowAutoPadding(b
);
97 OS
.emitRawComment("autopadding");
99 OS
.emitRawComment("noautopadding");
103 // Emit a minimal sequence of nops spanning NumBytes bytes.
104 static void emitX86Nops(MCStreamer
&OS
, unsigned NumBytes
,
105 const X86Subtarget
*Subtarget
);
107 void X86AsmPrinter::StackMapShadowTracker::count(MCInst
&Inst
,
108 const MCSubtargetInfo
&STI
,
109 MCCodeEmitter
*CodeEmitter
) {
111 SmallString
<256> Code
;
112 SmallVector
<MCFixup
, 4> Fixups
;
113 CodeEmitter
->encodeInstruction(Inst
, Code
, Fixups
, STI
);
114 CurrentShadowSize
+= Code
.size();
115 if (CurrentShadowSize
>= RequiredShadowSize
)
116 InShadow
= false; // The shadow is big enough. Stop counting.
120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121 MCStreamer
&OutStreamer
, const MCSubtargetInfo
&STI
) {
122 if (InShadow
&& CurrentShadowSize
< RequiredShadowSize
) {
124 emitX86Nops(OutStreamer
, RequiredShadowSize
- CurrentShadowSize
,
125 &MF
->getSubtarget
<X86Subtarget
>());
129 void X86AsmPrinter::EmitAndCountInstruction(MCInst
&Inst
) {
130 OutStreamer
->emitInstruction(Inst
, getSubtargetInfo());
131 SMShadowTracker
.count(Inst
, getSubtargetInfo(), CodeEmitter
.get());
134 X86MCInstLower::X86MCInstLower(const MachineFunction
&mf
,
135 X86AsmPrinter
&asmprinter
)
136 : Ctx(mf
.getContext()), MF(mf
), TM(mf
.getTarget()), MAI(*TM
.getMCAsmInfo()),
137 AsmPrinter(asmprinter
) {}
139 MachineModuleInfoMachO
&X86MCInstLower::getMachOMMI() const {
140 return MF
.getMMI().getObjFileInfo
<MachineModuleInfoMachO
>();
143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144 /// operand to an MCSymbol.
145 MCSymbol
*X86MCInstLower::GetSymbolFromOperand(const MachineOperand
&MO
) const {
146 const Triple
&TT
= TM
.getTargetTriple();
147 if (MO
.isGlobal() && TT
.isOSBinFormatELF())
148 return AsmPrinter
.getSymbolPreferLocal(*MO
.getGlobal());
150 const DataLayout
&DL
= MF
.getDataLayout();
151 assert((MO
.isGlobal() || MO
.isSymbol() || MO
.isMBB()) &&
152 "Isn't a symbol reference");
154 MCSymbol
*Sym
= nullptr;
155 SmallString
<128> Name
;
158 switch (MO
.getTargetFlags()) {
159 case X86II::MO_DLLIMPORT
:
160 // Handle dllimport linkage.
163 case X86II::MO_COFFSTUB
:
166 case X86II::MO_DARWIN_NONLAZY
:
167 case X86II::MO_DARWIN_NONLAZY_PIC_BASE
:
168 Suffix
= "$non_lazy_ptr";
173 Name
+= DL
.getPrivateGlobalPrefix();
176 const GlobalValue
*GV
= MO
.getGlobal();
177 AsmPrinter
.getNameWithPrefix(Name
, GV
);
178 } else if (MO
.isSymbol()) {
179 Mangler::getNameWithPrefix(Name
, MO
.getSymbolName(), DL
);
180 } else if (MO
.isMBB()) {
181 assert(Suffix
.empty());
182 Sym
= MO
.getMBB()->getSymbol();
187 Sym
= Ctx
.getOrCreateSymbol(Name
);
189 // If the target flags on the operand changes the name of the symbol, do that
190 // before we return the symbol.
191 switch (MO
.getTargetFlags()) {
194 case X86II::MO_COFFSTUB
: {
195 MachineModuleInfoCOFF
&MMICOFF
=
196 MF
.getMMI().getObjFileInfo
<MachineModuleInfoCOFF
>();
197 MachineModuleInfoImpl::StubValueTy
&StubSym
= MMICOFF
.getGVStubEntry(Sym
);
198 if (!StubSym
.getPointer()) {
199 assert(MO
.isGlobal() && "Extern symbol not handled yet");
200 StubSym
= MachineModuleInfoImpl::StubValueTy(
201 AsmPrinter
.getSymbol(MO
.getGlobal()), true);
205 case X86II::MO_DARWIN_NONLAZY
:
206 case X86II::MO_DARWIN_NONLAZY_PIC_BASE
: {
207 MachineModuleInfoImpl::StubValueTy
&StubSym
=
208 getMachOMMI().getGVStubEntry(Sym
);
209 if (!StubSym
.getPointer()) {
210 assert(MO
.isGlobal() && "Extern symbol not handled yet");
211 StubSym
= MachineModuleInfoImpl::StubValueTy(
212 AsmPrinter
.getSymbol(MO
.getGlobal()),
213 !MO
.getGlobal()->hasInternalLinkage());
222 MCOperand
X86MCInstLower::LowerSymbolOperand(const MachineOperand
&MO
,
223 MCSymbol
*Sym
) const {
224 // FIXME: We would like an efficient form for this, so we don't have to do a
225 // lot of extra uniquing.
226 const MCExpr
*Expr
= nullptr;
227 MCSymbolRefExpr::VariantKind RefKind
= MCSymbolRefExpr::VK_None
;
229 switch (MO
.getTargetFlags()) {
231 llvm_unreachable("Unknown target flag on GV operand");
232 case X86II::MO_NO_FLAG
: // No flag.
233 // These affect the name of the symbol, not any suffix.
234 case X86II::MO_DARWIN_NONLAZY
:
235 case X86II::MO_DLLIMPORT
:
236 case X86II::MO_COFFSTUB
:
240 RefKind
= MCSymbolRefExpr::VK_TLVP
;
242 case X86II::MO_TLVP_PIC_BASE
:
243 Expr
= MCSymbolRefExpr::create(Sym
, MCSymbolRefExpr::VK_TLVP
, Ctx
);
244 // Subtract the pic base.
245 Expr
= MCBinaryExpr::createSub(
246 Expr
, MCSymbolRefExpr::create(MF
.getPICBaseSymbol(), Ctx
), Ctx
);
248 case X86II::MO_SECREL
:
249 RefKind
= MCSymbolRefExpr::VK_SECREL
;
251 case X86II::MO_TLSGD
:
252 RefKind
= MCSymbolRefExpr::VK_TLSGD
;
254 case X86II::MO_TLSLD
:
255 RefKind
= MCSymbolRefExpr::VK_TLSLD
;
257 case X86II::MO_TLSLDM
:
258 RefKind
= MCSymbolRefExpr::VK_TLSLDM
;
260 case X86II::MO_GOTTPOFF
:
261 RefKind
= MCSymbolRefExpr::VK_GOTTPOFF
;
263 case X86II::MO_INDNTPOFF
:
264 RefKind
= MCSymbolRefExpr::VK_INDNTPOFF
;
266 case X86II::MO_TPOFF
:
267 RefKind
= MCSymbolRefExpr::VK_TPOFF
;
269 case X86II::MO_DTPOFF
:
270 RefKind
= MCSymbolRefExpr::VK_DTPOFF
;
272 case X86II::MO_NTPOFF
:
273 RefKind
= MCSymbolRefExpr::VK_NTPOFF
;
275 case X86II::MO_GOTNTPOFF
:
276 RefKind
= MCSymbolRefExpr::VK_GOTNTPOFF
;
278 case X86II::MO_GOTPCREL
:
279 RefKind
= MCSymbolRefExpr::VK_GOTPCREL
;
281 case X86II::MO_GOTPCREL_NORELAX
:
282 RefKind
= MCSymbolRefExpr::VK_GOTPCREL_NORELAX
;
285 RefKind
= MCSymbolRefExpr::VK_GOT
;
287 case X86II::MO_GOTOFF
:
288 RefKind
= MCSymbolRefExpr::VK_GOTOFF
;
291 RefKind
= MCSymbolRefExpr::VK_PLT
;
294 RefKind
= MCSymbolRefExpr::VK_X86_ABS8
;
296 case X86II::MO_PIC_BASE_OFFSET
:
297 case X86II::MO_DARWIN_NONLAZY_PIC_BASE
:
298 Expr
= MCSymbolRefExpr::create(Sym
, Ctx
);
299 // Subtract the pic base.
300 Expr
= MCBinaryExpr::createSub(
301 Expr
, MCSymbolRefExpr::create(MF
.getPICBaseSymbol(), Ctx
), Ctx
);
303 assert(MAI
.doesSetDirectiveSuppressReloc());
304 // If .set directive is supported, use it to reduce the number of
305 // relocations the assembler will generate for differences between
306 // local labels. This is only safe when the symbols are in the same
307 // section so we are restricting it to jumptable references.
308 MCSymbol
*Label
= Ctx
.createTempSymbol();
309 AsmPrinter
.OutStreamer
->emitAssignment(Label
, Expr
);
310 Expr
= MCSymbolRefExpr::create(Label
, Ctx
);
316 Expr
= MCSymbolRefExpr::create(Sym
, RefKind
, Ctx
);
318 if (!MO
.isJTI() && !MO
.isMBB() && MO
.getOffset())
319 Expr
= MCBinaryExpr::createAdd(
320 Expr
, MCConstantExpr::create(MO
.getOffset(), Ctx
), Ctx
);
321 return MCOperand::createExpr(Expr
);
324 static unsigned getRetOpcode(const X86Subtarget
&Subtarget
) {
325 return Subtarget
.is64Bit() ? X86::RET64
: X86::RET32
;
328 std::optional
<MCOperand
>
329 X86MCInstLower::LowerMachineOperand(const MachineInstr
*MI
,
330 const MachineOperand
&MO
) const {
331 switch (MO
.getType()) {
334 llvm_unreachable("unknown operand type");
335 case MachineOperand::MO_Register
:
336 // Ignore all implicit register operands.
339 return MCOperand::createReg(MO
.getReg());
340 case MachineOperand::MO_Immediate
:
341 return MCOperand::createImm(MO
.getImm());
342 case MachineOperand::MO_MachineBasicBlock
:
343 case MachineOperand::MO_GlobalAddress
:
344 case MachineOperand::MO_ExternalSymbol
:
345 return LowerSymbolOperand(MO
, GetSymbolFromOperand(MO
));
346 case MachineOperand::MO_MCSymbol
:
347 return LowerSymbolOperand(MO
, MO
.getMCSymbol());
348 case MachineOperand::MO_JumpTableIndex
:
349 return LowerSymbolOperand(MO
, AsmPrinter
.GetJTISymbol(MO
.getIndex()));
350 case MachineOperand::MO_ConstantPoolIndex
:
351 return LowerSymbolOperand(MO
, AsmPrinter
.GetCPISymbol(MO
.getIndex()));
352 case MachineOperand::MO_BlockAddress
:
353 return LowerSymbolOperand(
354 MO
, AsmPrinter
.GetBlockAddressSymbol(MO
.getBlockAddress()));
355 case MachineOperand::MO_RegisterMask
:
356 // Ignore call clobbers.
361 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
363 static unsigned convertTailJumpOpcode(unsigned Opcode
) {
366 Opcode
= X86::JMP32r
;
369 Opcode
= X86::JMP32m
;
371 case X86::TAILJMPr64
:
372 Opcode
= X86::JMP64r
;
374 case X86::TAILJMPm64
:
375 Opcode
= X86::JMP64m
;
377 case X86::TAILJMPr64_REX
:
378 Opcode
= X86::JMP64r_REX
;
380 case X86::TAILJMPm64_REX
:
381 Opcode
= X86::JMP64m_REX
;
384 case X86::TAILJMPd64
:
387 case X86::TAILJMPd_CC
:
388 case X86::TAILJMPd64_CC
:
396 void X86MCInstLower::Lower(const MachineInstr
*MI
, MCInst
&OutMI
) const {
397 OutMI
.setOpcode(MI
->getOpcode());
399 for (const MachineOperand
&MO
: MI
->operands())
400 if (auto MaybeMCOp
= LowerMachineOperand(MI
, MO
))
401 OutMI
.addOperand(*MaybeMCOp
);
403 bool In64BitMode
= AsmPrinter
.getSubtarget().is64Bit();
404 if (X86::optimizeInstFromVEX3ToVEX2(OutMI
, MI
->getDesc()) ||
405 X86::optimizeShiftRotateWithImmediateOne(OutMI
) ||
406 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI
) ||
407 X86::optimizeMOVSX(OutMI
) || X86::optimizeINCDEC(OutMI
, In64BitMode
) ||
408 X86::optimizeMOV(OutMI
, In64BitMode
) ||
409 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI
))
412 // Handle a few special cases to eliminate operand modifiers.
413 switch (OutMI
.getOpcode()) {
418 // LEA should have a segment register, but it must be empty.
419 assert(OutMI
.getNumOperands() == 1 + X86::AddrNumOperands
&&
420 "Unexpected # of LEA operands");
421 assert(OutMI
.getOperand(1 + X86::AddrSegmentReg
).getReg() == 0 &&
422 "LEA has segment specified!");
427 case X86::MULX64Hrm
: {
428 // Turn into regular MULX by duplicating the destination.
430 switch (OutMI
.getOpcode()) {
431 default: llvm_unreachable("Invalid opcode");
432 case X86::MULX32Hrr
: NewOpc
= X86::MULX32rr
; break;
433 case X86::MULX32Hrm
: NewOpc
= X86::MULX32rm
; break;
434 case X86::MULX64Hrr
: NewOpc
= X86::MULX64rr
; break;
435 case X86::MULX64Hrm
: NewOpc
= X86::MULX64rm
; break;
437 OutMI
.setOpcode(NewOpc
);
438 // Duplicate the destination.
439 unsigned DestReg
= OutMI
.getOperand(0).getReg();
440 OutMI
.insert(OutMI
.begin(), MCOperand::createReg(DestReg
));
443 // CALL64r, CALL64pcrel32 - These instructions used to have
444 // register inputs modeled as normal uses instead of implicit uses. As such,
445 // they we used to truncate off all but the first operand (the callee). This
446 // issue seems to have been fixed at some point. This assert verifies that.
448 case X86::CALL64pcrel32
:
449 assert(OutMI
.getNumOperands() == 1 && "Unexpected number of operands!");
452 case X86::EH_RETURN64
: {
454 OutMI
.setOpcode(getRetOpcode(AsmPrinter
.getSubtarget()));
457 case X86::CLEANUPRET
: {
458 // Replace CLEANUPRET with the appropriate RET.
460 OutMI
.setOpcode(getRetOpcode(AsmPrinter
.getSubtarget()));
463 case X86::CATCHRET
: {
464 // Replace CATCHRET with the appropriate RET.
465 const X86Subtarget
&Subtarget
= AsmPrinter
.getSubtarget();
466 unsigned ReturnReg
= In64BitMode
? X86::RAX
: X86::EAX
;
468 OutMI
.setOpcode(getRetOpcode(Subtarget
));
469 OutMI
.addOperand(MCOperand::createReg(ReturnReg
));
472 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
475 case X86::TAILJMPr64
:
476 case X86::TAILJMPr64_REX
:
478 case X86::TAILJMPd64
:
479 assert(OutMI
.getNumOperands() == 1 && "Unexpected number of operands!");
480 OutMI
.setOpcode(convertTailJumpOpcode(OutMI
.getOpcode()));
482 case X86::TAILJMPd_CC
:
483 case X86::TAILJMPd64_CC
:
484 assert(OutMI
.getNumOperands() == 2 && "Unexpected number of operands!");
485 OutMI
.setOpcode(convertTailJumpOpcode(OutMI
.getOpcode()));
488 case X86::TAILJMPm64
:
489 case X86::TAILJMPm64_REX
:
490 assert(OutMI
.getNumOperands() == X86::AddrNumOperands
&&
491 "Unexpected number of operands!");
492 OutMI
.setOpcode(convertTailJumpOpcode(OutMI
.getOpcode()));
494 case X86::MASKMOVDQU
:
495 case X86::VMASKMOVDQU
:
497 OutMI
.setFlags(X86::IP_HAS_AD_SIZE
);
505 // Add an REP prefix to BSF instructions so that new processors can
506 // recognize as TZCNT, which has better performance than BSF.
507 // BSF and TZCNT have different interpretations on ZF bit. So make sure
508 // it won't be used later.
509 const MachineOperand
*FlagDef
= MI
->findRegisterDefOperand(X86::EFLAGS
);
510 if (!MF
.getFunction().hasOptSize() && FlagDef
&& FlagDef
->isDead())
511 OutMI
.setFlags(X86::IP_HAS_REPEAT
);
519 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower
&MCInstLowering
,
520 const MachineInstr
&MI
) {
521 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
522 bool Is64Bits
= MI
.getOpcode() != X86::TLS_addr32
&&
523 MI
.getOpcode() != X86::TLS_base_addr32
;
524 bool Is64BitsLP64
= MI
.getOpcode() == X86::TLS_addr64
||
525 MI
.getOpcode() == X86::TLS_base_addr64
;
526 MCContext
&Ctx
= OutStreamer
->getContext();
528 MCSymbolRefExpr::VariantKind SRVK
;
529 switch (MI
.getOpcode()) {
530 case X86::TLS_addr32
:
531 case X86::TLS_addr64
:
532 case X86::TLS_addrX32
:
533 SRVK
= MCSymbolRefExpr::VK_TLSGD
;
535 case X86::TLS_base_addr32
:
536 SRVK
= MCSymbolRefExpr::VK_TLSLDM
;
538 case X86::TLS_base_addr64
:
539 case X86::TLS_base_addrX32
:
540 SRVK
= MCSymbolRefExpr::VK_TLSLD
;
543 llvm_unreachable("unexpected opcode");
546 const MCSymbolRefExpr
*Sym
= MCSymbolRefExpr::create(
547 MCInstLowering
.GetSymbolFromOperand(MI
.getOperand(3)), SRVK
, Ctx
);
549 // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
550 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
551 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
552 // only using GOT when GOTPCRELX is enabled.
553 // TODO Delete the workaround when GOTPCRELX becomes commonplace.
554 bool UseGot
= MMI
->getModule()->getRtLibUseGOT() &&
555 Ctx
.getAsmInfo()->canRelaxRelocations();
558 bool NeedsPadding
= SRVK
== MCSymbolRefExpr::VK_TLSGD
;
559 if (NeedsPadding
&& Is64BitsLP64
)
560 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX
));
561 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r
)
568 const MCSymbol
*TlsGetAddr
= Ctx
.getOrCreateSymbol("__tls_get_addr");
571 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX
));
572 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX
));
573 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX
));
576 const MCExpr
*Expr
= MCSymbolRefExpr::create(
577 TlsGetAddr
, MCSymbolRefExpr::VK_GOTPCREL
, Ctx
);
578 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m
)
585 EmitAndCountInstruction(
586 MCInstBuilder(X86::CALL64pcrel32
)
587 .addExpr(MCSymbolRefExpr::create(TlsGetAddr
,
588 MCSymbolRefExpr::VK_PLT
, Ctx
)));
591 if (SRVK
== MCSymbolRefExpr::VK_TLSGD
&& !UseGot
) {
592 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r
)
600 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r
)
609 const MCSymbol
*TlsGetAddr
= Ctx
.getOrCreateSymbol("___tls_get_addr");
612 MCSymbolRefExpr::create(TlsGetAddr
, MCSymbolRefExpr::VK_GOT
, Ctx
);
613 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m
)
620 EmitAndCountInstruction(
621 MCInstBuilder(X86::CALLpcrel32
)
622 .addExpr(MCSymbolRefExpr::create(TlsGetAddr
,
623 MCSymbolRefExpr::VK_PLT
, Ctx
)));
628 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
629 /// bytes. Return the size of nop emitted.
630 static unsigned emitNop(MCStreamer
&OS
, unsigned NumBytes
,
631 const X86Subtarget
*Subtarget
) {
632 // Determine the longest nop which can be efficiently decoded for the given
633 // target cpu. 15-bytes is the longest single NOP instruction, but some
634 // platforms can't decode the longest forms efficiently.
635 unsigned MaxNopLength
= 1;
636 if (Subtarget
->is64Bit()) {
637 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
638 // IndexReg/BaseReg below need to be updated.
639 if (Subtarget
->hasFeature(X86::TuningFast7ByteNOP
))
641 else if (Subtarget
->hasFeature(X86::TuningFast15ByteNOP
))
643 else if (Subtarget
->hasFeature(X86::TuningFast11ByteNOP
))
647 } if (Subtarget
->is32Bit())
650 // Cap a single nop emission at the profitable value for the target
651 NumBytes
= std::min(NumBytes
, MaxNopLength
);
654 unsigned Opc
, BaseReg
, ScaleVal
, IndexReg
, Displacement
, SegmentReg
;
655 IndexReg
= Displacement
= SegmentReg
= 0;
660 llvm_unreachable("Zero nops?");
713 SegmentReg
= X86::CS
;
717 unsigned NumPrefixes
= std::min(NumBytes
- NopSize
, 5U);
718 NopSize
+= NumPrefixes
;
719 for (unsigned i
= 0; i
!= NumPrefixes
; ++i
)
720 OS
.emitBytes("\x66");
723 default: llvm_unreachable("Unexpected opcode");
725 OS
.emitInstruction(MCInstBuilder(Opc
), *Subtarget
);
728 OS
.emitInstruction(MCInstBuilder(Opc
).addReg(X86::AX
).addReg(X86::AX
),
733 OS
.emitInstruction(MCInstBuilder(Opc
)
737 .addImm(Displacement
)
742 assert(NopSize
<= NumBytes
&& "We overemitted?");
746 /// Emit the optimal amount of multi-byte nops on X86.
747 static void emitX86Nops(MCStreamer
&OS
, unsigned NumBytes
,
748 const X86Subtarget
*Subtarget
) {
749 unsigned NopsToEmit
= NumBytes
;
752 NumBytes
-= emitNop(OS
, NumBytes
, Subtarget
);
753 assert(NopsToEmit
>= NumBytes
&& "Emitted more than I asked for!");
757 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr
&MI
,
758 X86MCInstLower
&MCIL
) {
759 assert(Subtarget
->is64Bit() && "Statepoint currently only supports X86-64");
761 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
763 StatepointOpers
SOpers(&MI
);
764 if (unsigned PatchBytes
= SOpers
.getNumPatchBytes()) {
765 emitX86Nops(*OutStreamer
, PatchBytes
, Subtarget
);
767 // Lower call target and choose correct opcode
768 const MachineOperand
&CallTarget
= SOpers
.getCallTarget();
769 MCOperand CallTargetMCOp
;
771 switch (CallTarget
.getType()) {
772 case MachineOperand::MO_GlobalAddress
:
773 case MachineOperand::MO_ExternalSymbol
:
774 CallTargetMCOp
= MCIL
.LowerSymbolOperand(
775 CallTarget
, MCIL
.GetSymbolFromOperand(CallTarget
));
776 CallOpcode
= X86::CALL64pcrel32
;
777 // Currently, we only support relative addressing with statepoints.
778 // Otherwise, we'll need a scratch register to hold the target
779 // address. You'll fail asserts during load & relocation if this
780 // symbol is to far away. (TODO: support non-relative addressing)
782 case MachineOperand::MO_Immediate
:
783 CallTargetMCOp
= MCOperand::createImm(CallTarget
.getImm());
784 CallOpcode
= X86::CALL64pcrel32
;
785 // Currently, we only support relative addressing with statepoints.
786 // Otherwise, we'll need a scratch register to hold the target
787 // immediate. You'll fail asserts during load & relocation if this
788 // address is to far away. (TODO: support non-relative addressing)
790 case MachineOperand::MO_Register
:
791 // FIXME: Add retpoline support and remove this.
792 if (Subtarget
->useIndirectThunkCalls())
793 report_fatal_error("Lowering register statepoints with thunks not "
795 CallTargetMCOp
= MCOperand::createReg(CallTarget
.getReg());
796 CallOpcode
= X86::CALL64r
;
799 llvm_unreachable("Unsupported operand type in statepoint call target");
805 CallInst
.setOpcode(CallOpcode
);
806 CallInst
.addOperand(CallTargetMCOp
);
807 OutStreamer
->emitInstruction(CallInst
, getSubtargetInfo());
810 // Record our statepoint node in the same section used by STACKMAP
812 auto &Ctx
= OutStreamer
->getContext();
813 MCSymbol
*MILabel
= Ctx
.createTempSymbol();
814 OutStreamer
->emitLabel(MILabel
);
815 SM
.recordStatepoint(*MILabel
, MI
);
818 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr
&FaultingMI
,
819 X86MCInstLower
&MCIL
) {
820 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
821 // <opcode>, <operands>
823 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
825 Register DefRegister
= FaultingMI
.getOperand(0).getReg();
826 FaultMaps::FaultKind FK
=
827 static_cast<FaultMaps::FaultKind
>(FaultingMI
.getOperand(1).getImm());
828 MCSymbol
*HandlerLabel
= FaultingMI
.getOperand(2).getMBB()->getSymbol();
829 unsigned Opcode
= FaultingMI
.getOperand(3).getImm();
830 unsigned OperandsBeginIdx
= 4;
832 auto &Ctx
= OutStreamer
->getContext();
833 MCSymbol
*FaultingLabel
= Ctx
.createTempSymbol();
834 OutStreamer
->emitLabel(FaultingLabel
);
836 assert(FK
< FaultMaps::FaultKindMax
&& "Invalid Faulting Kind!");
837 FM
.recordFaultingOp(FK
, FaultingLabel
, HandlerLabel
);
840 MI
.setOpcode(Opcode
);
842 if (DefRegister
!= X86::NoRegister
)
843 MI
.addOperand(MCOperand::createReg(DefRegister
));
845 for (const MachineOperand
&MO
:
846 llvm::drop_begin(FaultingMI
.operands(), OperandsBeginIdx
))
847 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&FaultingMI
, MO
))
848 MI
.addOperand(*MaybeOperand
);
850 OutStreamer
->AddComment("on-fault: " + HandlerLabel
->getName());
851 OutStreamer
->emitInstruction(MI
, getSubtargetInfo());
854 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr
&MI
,
855 X86MCInstLower
&MCIL
) {
856 bool Is64Bits
= Subtarget
->is64Bit();
857 MCContext
&Ctx
= OutStreamer
->getContext();
858 MCSymbol
*fentry
= Ctx
.getOrCreateSymbol("__fentry__");
859 const MCSymbolRefExpr
*Op
=
860 MCSymbolRefExpr::create(fentry
, MCSymbolRefExpr::VK_None
, Ctx
);
862 EmitAndCountInstruction(
863 MCInstBuilder(Is64Bits
? X86::CALL64pcrel32
: X86::CALLpcrel32
)
867 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr
&MI
) {
868 assert(std::next(MI
.getIterator())->isCall() &&
869 "KCFI_CHECK not followed by a call instruction");
871 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
872 // returns a 1-byte X86::NOOP, which means the offset is the same in
873 // bytes. This assumes that patchable-function-prefix is the same for all
875 const MachineFunction
&MF
= *MI
.getMF();
876 int64_t PrefixNops
= 0;
877 (void)MF
.getFunction()
878 .getFnAttribute("patchable-function-prefix")
880 .getAsInteger(10, PrefixNops
);
882 // KCFI allows indirect calls to any location that's preceded by a valid
883 // type identifier. To avoid encoding the full constant into an instruction,
884 // and thus emitting potential call target gadgets at each indirect call
885 // site, load a negated constant to a register and compare that to the
886 // expected value at the call target.
887 const Register AddrReg
= MI
.getOperand(0).getReg();
888 const uint32_t Type
= MI
.getOperand(1).getImm();
889 // The check is immediately before the call. If the call target is in R10,
890 // we can clobber R11 for the check instead.
891 unsigned TempReg
= AddrReg
== X86::R10
? X86::R11D
: X86::R10D
;
892 EmitAndCountInstruction(
893 MCInstBuilder(X86::MOV32ri
).addReg(TempReg
).addImm(-MaskKCFIType(Type
)));
894 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm
)
895 .addReg(X86::NoRegister
)
899 .addReg(X86::NoRegister
)
900 .addImm(-(PrefixNops
+ 4))
901 .addReg(X86::NoRegister
));
903 MCSymbol
*Pass
= OutContext
.createTempSymbol();
904 EmitAndCountInstruction(
905 MCInstBuilder(X86::JCC_1
)
906 .addExpr(MCSymbolRefExpr::create(Pass
, OutContext
))
907 .addImm(X86::COND_E
));
909 MCSymbol
*Trap
= OutContext
.createTempSymbol();
910 OutStreamer
->emitLabel(Trap
);
911 EmitAndCountInstruction(MCInstBuilder(X86::TRAP
));
912 emitKCFITrapEntry(MF
, Trap
);
913 OutStreamer
->emitLabel(Pass
);
916 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr
&MI
) {
917 // FIXME: Make this work on non-ELF.
918 if (!TM
.getTargetTriple().isOSBinFormatELF()) {
919 report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
923 const auto &Reg
= MI
.getOperand(0).getReg();
924 ASanAccessInfo
AccessInfo(MI
.getOperand(1).getImm());
929 getAddressSanitizerParams(Triple(TM
.getTargetTriple()), 64,
930 AccessInfo
.CompileKernel
, &ShadowBase
,
931 &MappingScale
, &OrShadowOffset
);
933 StringRef Name
= AccessInfo
.IsWrite
? "store" : "load";
934 StringRef Op
= OrShadowOffset
? "or" : "add";
935 std::string SymName
= ("__asan_check_" + Name
+ "_" + Op
+ "_" +
936 Twine(1ULL << AccessInfo
.AccessSizeIndex
) + "_" +
937 TM
.getMCRegisterInfo()->getName(Reg
.asMCReg()))
941 "OrShadowOffset is not supported with optimized callbacks");
943 EmitAndCountInstruction(
944 MCInstBuilder(X86::CALL64pcrel32
)
945 .addExpr(MCSymbolRefExpr::create(
946 OutContext
.getOrCreateSymbol(SymName
), OutContext
)));
949 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr
&MI
,
950 X86MCInstLower
&MCIL
) {
951 // PATCHABLE_OP minsize, opcode, operands
953 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
955 unsigned MinSize
= MI
.getOperand(0).getImm();
956 unsigned Opcode
= MI
.getOperand(1).getImm();
957 // Opcode PATCHABLE_OP is a special case: there is no instruction to wrap,
958 // simply emit a nop of size MinSize.
959 bool EmptyInst
= (Opcode
== TargetOpcode::PATCHABLE_OP
);
962 MCI
.setOpcode(Opcode
);
963 for (auto &MO
: drop_begin(MI
.operands(), 2))
964 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&MI
, MO
))
965 MCI
.addOperand(*MaybeOperand
);
967 SmallString
<256> Code
;
969 SmallVector
<MCFixup
, 4> Fixups
;
970 CodeEmitter
->encodeInstruction(MCI
, Code
, Fixups
, getSubtargetInfo());
973 if (Code
.size() < MinSize
) {
974 if (MinSize
== 2 && Subtarget
->is32Bit() &&
975 Subtarget
->isTargetWindowsMSVC() &&
976 (Subtarget
->getCPU().empty() || Subtarget
->getCPU() == "pentium3")) {
977 // For compatibility reasons, when targetting MSVC, it is important to
978 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
979 // rely specifically on this pattern to be able to patch a function.
980 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
981 OutStreamer
->emitInstruction(
982 MCInstBuilder(X86::MOV32rr_REV
).addReg(X86::EDI
).addReg(X86::EDI
),
984 } else if (MinSize
== 2 && Opcode
== X86::PUSH64r
) {
985 // This is an optimization that lets us get away without emitting a nop in
988 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
989 // bytes too, so the check on MinSize is important.
990 MCI
.setOpcode(X86::PUSH64rmr
);
992 unsigned NopSize
= emitNop(*OutStreamer
, MinSize
, Subtarget
);
993 assert(NopSize
== MinSize
&& "Could not implement MinSize!");
998 OutStreamer
->emitInstruction(MCI
, getSubtargetInfo());
1001 // Lower a stackmap of the form:
1002 // <id>, <shadowBytes>, ...
1003 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr
&MI
) {
1004 SMShadowTracker
.emitShadowPadding(*OutStreamer
, getSubtargetInfo());
1006 auto &Ctx
= OutStreamer
->getContext();
1007 MCSymbol
*MILabel
= Ctx
.createTempSymbol();
1008 OutStreamer
->emitLabel(MILabel
);
1010 SM
.recordStackMap(*MILabel
, MI
);
1011 unsigned NumShadowBytes
= MI
.getOperand(1).getImm();
1012 SMShadowTracker
.reset(NumShadowBytes
);
1015 // Lower a patchpoint of the form:
1016 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1017 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr
&MI
,
1018 X86MCInstLower
&MCIL
) {
1019 assert(Subtarget
->is64Bit() && "Patchpoint currently only supports X86-64");
1021 SMShadowTracker
.emitShadowPadding(*OutStreamer
, getSubtargetInfo());
1023 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1025 auto &Ctx
= OutStreamer
->getContext();
1026 MCSymbol
*MILabel
= Ctx
.createTempSymbol();
1027 OutStreamer
->emitLabel(MILabel
);
1028 SM
.recordPatchPoint(*MILabel
, MI
);
1030 PatchPointOpers
opers(&MI
);
1031 unsigned ScratchIdx
= opers
.getNextScratchIdx();
1032 unsigned EncodedBytes
= 0;
1033 const MachineOperand
&CalleeMO
= opers
.getCallTarget();
1035 // Check for null target. If target is non-null (i.e. is non-zero or is
1036 // symbolic) then emit a call.
1037 if (!(CalleeMO
.isImm() && !CalleeMO
.getImm())) {
1038 MCOperand CalleeMCOp
;
1039 switch (CalleeMO
.getType()) {
1041 /// FIXME: Add a verifier check for bad callee types.
1042 llvm_unreachable("Unrecognized callee operand type.");
1043 case MachineOperand::MO_Immediate
:
1044 if (CalleeMO
.getImm())
1045 CalleeMCOp
= MCOperand::createImm(CalleeMO
.getImm());
1047 case MachineOperand::MO_ExternalSymbol
:
1048 case MachineOperand::MO_GlobalAddress
:
1049 CalleeMCOp
= MCIL
.LowerSymbolOperand(CalleeMO
,
1050 MCIL
.GetSymbolFromOperand(CalleeMO
));
1054 // Emit MOV to materialize the target address and the CALL to target.
1055 // This is encoded with 12-13 bytes, depending on which register is used.
1056 Register ScratchReg
= MI
.getOperand(ScratchIdx
).getReg();
1057 if (X86II::isX86_64ExtendedReg(ScratchReg
))
1062 EmitAndCountInstruction(
1063 MCInstBuilder(X86::MOV64ri
).addReg(ScratchReg
).addOperand(CalleeMCOp
));
1064 // FIXME: Add retpoline support and remove this.
1065 if (Subtarget
->useIndirectThunkCalls())
1067 "Lowering patchpoint with thunks not yet implemented.");
1068 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r
).addReg(ScratchReg
));
1072 unsigned NumBytes
= opers
.getNumPatchBytes();
1073 assert(NumBytes
>= EncodedBytes
&&
1074 "Patchpoint can't request size less than the length of a call.");
1076 emitX86Nops(*OutStreamer
, NumBytes
- EncodedBytes
, Subtarget
);
1079 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr
&MI
,
1080 X86MCInstLower
&MCIL
) {
1081 assert(Subtarget
->is64Bit() && "XRay custom events only supports X86-64");
1083 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1085 // We want to emit the following pattern, which follows the x86 calling
1086 // convention to prepare for the trampoline call to be patched in.
1089 // .Lxray_event_sled_N:
1090 // jmp +N // jump across the instrumentation sled
1091 // ... // set up arguments in register
1092 // callq __xray_CustomEvent@plt // force dependency to symbol
1096 // After patching, it would look something like:
1098 // nopw (2-byte nop)
1100 // callq __xrayCustomEvent // already lowered
1104 // First we emit the label and the jump.
1105 auto CurSled
= OutContext
.createTempSymbol("xray_event_sled_", true);
1106 OutStreamer
->AddComment("# XRay Custom Event Log");
1107 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1108 OutStreamer
->emitLabel(CurSled
);
1110 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1111 // an operand (computed as an offset from the jmp instruction).
1112 // FIXME: Find another less hacky way do force the relative jump.
1113 OutStreamer
->emitBinaryData("\xeb\x0f");
1115 // The default C calling convention will place two arguments into %rcx and
1116 // %rdx -- so we only work with those.
1117 const Register DestRegs
[] = {X86::RDI
, X86::RSI
};
1118 bool UsedMask
[] = {false, false};
1119 // Filled out in loop.
1120 Register SrcRegs
[] = {0, 0};
1122 // Then we put the operands in the %rdi and %rsi registers. We spill the
1123 // values in the register before we clobber them, and mark them as used in
1124 // UsedMask. In case the arguments are already in the correct register, we use
1125 // emit nops appropriately sized to keep the sled the same size in every
1127 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1128 if (auto Op
= MCIL
.LowerMachineOperand(&MI
, MI
.getOperand(I
))) {
1129 assert(Op
->isReg() && "Only support arguments in registers");
1130 SrcRegs
[I
] = getX86SubSuperRegister(Op
->getReg(), 64);
1131 assert(SrcRegs
[I
].isValid() && "Invalid operand");
1132 if (SrcRegs
[I
] != DestRegs
[I
]) {
1134 EmitAndCountInstruction(
1135 MCInstBuilder(X86::PUSH64r
).addReg(DestRegs
[I
]));
1137 emitX86Nops(*OutStreamer
, 4, Subtarget
);
1141 // Now that the register values are stashed, mov arguments into place.
1142 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1143 // earlier DestReg. We will have already overwritten over the register before
1144 // we can copy from it.
1145 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1146 if (SrcRegs
[I
] != DestRegs
[I
])
1147 EmitAndCountInstruction(
1148 MCInstBuilder(X86::MOV64rr
).addReg(DestRegs
[I
]).addReg(SrcRegs
[I
]));
1150 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1151 // name of the trampoline to be implemented by the XRay runtime.
1152 auto TSym
= OutContext
.getOrCreateSymbol("__xray_CustomEvent");
1153 MachineOperand TOp
= MachineOperand::CreateMCSymbol(TSym
);
1154 if (isPositionIndependent())
1155 TOp
.setTargetFlags(X86II::MO_PLT
);
1157 // Emit the call instruction.
1158 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32
)
1159 .addOperand(MCIL
.LowerSymbolOperand(TOp
, TSym
)));
1161 // Restore caller-saved and used registers.
1162 for (unsigned I
= sizeof UsedMask
; I
-- > 0;)
1164 EmitAndCountInstruction(MCInstBuilder(X86::POP64r
).addReg(DestRegs
[I
]));
1166 emitX86Nops(*OutStreamer
, 1, Subtarget
);
1168 OutStreamer
->AddComment("xray custom event end.");
1170 // Record the sled version. Version 0 of this sled was spelled differently, so
1171 // we let the runtime handle the different offsets we're using. Version 2
1172 // changed the absolute address to a PC-relative address.
1173 recordSled(CurSled
, MI
, SledKind::CUSTOM_EVENT
, 2);
1176 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr
&MI
,
1177 X86MCInstLower
&MCIL
) {
1178 assert(Subtarget
->is64Bit() && "XRay typed events only supports X86-64");
1180 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1182 // We want to emit the following pattern, which follows the x86 calling
1183 // convention to prepare for the trampoline call to be patched in.
1186 // .Lxray_event_sled_N:
1187 // jmp +N // jump across the instrumentation sled
1188 // ... // set up arguments in register
1189 // callq __xray_TypedEvent@plt // force dependency to symbol
1193 // After patching, it would look something like:
1195 // nopw (2-byte nop)
1197 // callq __xrayTypedEvent // already lowered
1201 // First we emit the label and the jump.
1202 auto CurSled
= OutContext
.createTempSymbol("xray_typed_event_sled_", true);
1203 OutStreamer
->AddComment("# XRay Typed Event Log");
1204 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1205 OutStreamer
->emitLabel(CurSled
);
1207 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1208 // an operand (computed as an offset from the jmp instruction).
1209 // FIXME: Find another less hacky way do force the relative jump.
1210 OutStreamer
->emitBinaryData("\xeb\x14");
1212 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1213 // so we'll work with those. Or we may be called via SystemV, in which case
1214 // we don't have to do any translation.
1215 const Register DestRegs
[] = {X86::RDI
, X86::RSI
, X86::RDX
};
1216 bool UsedMask
[] = {false, false, false};
1218 // Will fill out src regs in the loop.
1219 Register SrcRegs
[] = {0, 0, 0};
1221 // Then we put the operands in the SystemV registers. We spill the values in
1222 // the registers before we clobber them, and mark them as used in UsedMask.
1223 // In case the arguments are already in the correct register, we emit nops
1224 // appropriately sized to keep the sled the same size in every situation.
1225 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1226 if (auto Op
= MCIL
.LowerMachineOperand(&MI
, MI
.getOperand(I
))) {
1227 // TODO: Is register only support adequate?
1228 assert(Op
->isReg() && "Only supports arguments in registers");
1229 SrcRegs
[I
] = getX86SubSuperRegister(Op
->getReg(), 64);
1230 assert(SrcRegs
[I
].isValid() && "Invalid operand");
1231 if (SrcRegs
[I
] != DestRegs
[I
]) {
1233 EmitAndCountInstruction(
1234 MCInstBuilder(X86::PUSH64r
).addReg(DestRegs
[I
]));
1236 emitX86Nops(*OutStreamer
, 4, Subtarget
);
1240 // In the above loop we only stash all of the destination registers or emit
1241 // nops if the arguments are already in the right place. Doing the actually
1242 // moving is postponed until after all the registers are stashed so nothing
1243 // is clobbers. We've already added nops to account for the size of mov and
1244 // push if the register is in the right place, so we only have to worry about
1246 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1247 // earlier DestReg. We will have already overwritten over the register before
1248 // we can copy from it.
1249 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1251 EmitAndCountInstruction(
1252 MCInstBuilder(X86::MOV64rr
).addReg(DestRegs
[I
]).addReg(SrcRegs
[I
]));
1254 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1255 // name of the trampoline to be implemented by the XRay runtime.
1256 auto TSym
= OutContext
.getOrCreateSymbol("__xray_TypedEvent");
1257 MachineOperand TOp
= MachineOperand::CreateMCSymbol(TSym
);
1258 if (isPositionIndependent())
1259 TOp
.setTargetFlags(X86II::MO_PLT
);
1261 // Emit the call instruction.
1262 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32
)
1263 .addOperand(MCIL
.LowerSymbolOperand(TOp
, TSym
)));
1265 // Restore caller-saved and used registers.
1266 for (unsigned I
= sizeof UsedMask
; I
-- > 0;)
1268 EmitAndCountInstruction(MCInstBuilder(X86::POP64r
).addReg(DestRegs
[I
]));
1270 emitX86Nops(*OutStreamer
, 1, Subtarget
);
1272 OutStreamer
->AddComment("xray typed event end.");
1274 // Record the sled version.
1275 recordSled(CurSled
, MI
, SledKind::TYPED_EVENT
, 2);
1278 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr
&MI
,
1279 X86MCInstLower
&MCIL
) {
1281 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1283 const Function
&F
= MF
->getFunction();
1284 if (F
.hasFnAttribute("patchable-function-entry")) {
1286 if (F
.getFnAttribute("patchable-function-entry")
1288 .getAsInteger(10, Num
))
1290 emitX86Nops(*OutStreamer
, Num
, Subtarget
);
1293 // We want to emit the following pattern:
1298 // # 9 bytes worth of noops
1300 // We need the 9 bytes because at runtime, we'd be patching over the full 11
1301 // bytes with the following pattern:
1303 // mov %r10, <function id, 32-bit> // 6 bytes
1304 // call <relative offset, 32-bits> // 5 bytes
1306 auto CurSled
= OutContext
.createTempSymbol("xray_sled_", true);
1307 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1308 OutStreamer
->emitLabel(CurSled
);
1310 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1311 // an operand (computed as an offset from the jmp instruction).
1312 // FIXME: Find another less hacky way do force the relative jump.
1313 OutStreamer
->emitBytes("\xeb\x09");
1314 emitX86Nops(*OutStreamer
, 9, Subtarget
);
1315 recordSled(CurSled
, MI
, SledKind::FUNCTION_ENTER
, 2);
1318 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr
&MI
,
1319 X86MCInstLower
&MCIL
) {
1320 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1322 // Since PATCHABLE_RET takes the opcode of the return statement as an
1323 // argument, we use that to emit the correct form of the RET that we want.
1324 // i.e. when we see this:
1326 // PATCHABLE_RET X86::RET ...
1328 // We should emit the RET followed by sleds.
1332 // ret # or equivalent instruction
1333 // # 10 bytes worth of noops
1335 // This just makes sure that the alignment for the next instruction is 2.
1336 auto CurSled
= OutContext
.createTempSymbol("xray_sled_", true);
1337 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1338 OutStreamer
->emitLabel(CurSled
);
1339 unsigned OpCode
= MI
.getOperand(0).getImm();
1341 Ret
.setOpcode(OpCode
);
1342 for (auto &MO
: drop_begin(MI
.operands()))
1343 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&MI
, MO
))
1344 Ret
.addOperand(*MaybeOperand
);
1345 OutStreamer
->emitInstruction(Ret
, getSubtargetInfo());
1346 emitX86Nops(*OutStreamer
, 10, Subtarget
);
1347 recordSled(CurSled
, MI
, SledKind::FUNCTION_EXIT
, 2);
1350 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr
&MI
,
1351 X86MCInstLower
&MCIL
) {
1352 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1354 // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1355 // instruction so we lower that particular instruction and its operands.
1356 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1357 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1358 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1359 // tail call much like how we have it in PATCHABLE_RET.
1360 auto CurSled
= OutContext
.createTempSymbol("xray_sled_", true);
1361 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1362 OutStreamer
->emitLabel(CurSled
);
1363 auto Target
= OutContext
.createTempSymbol();
1365 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1366 // an operand (computed as an offset from the jmp instruction).
1367 // FIXME: Find another less hacky way do force the relative jump.
1368 OutStreamer
->emitBytes("\xeb\x09");
1369 emitX86Nops(*OutStreamer
, 9, Subtarget
);
1370 OutStreamer
->emitLabel(Target
);
1371 recordSled(CurSled
, MI
, SledKind::TAIL_CALL
, 2);
1373 unsigned OpCode
= MI
.getOperand(0).getImm();
1374 OpCode
= convertTailJumpOpcode(OpCode
);
1376 TC
.setOpcode(OpCode
);
1378 // Before emitting the instruction, add a comment to indicate that this is
1379 // indeed a tail call.
1380 OutStreamer
->AddComment("TAILCALL");
1381 for (auto &MO
: drop_begin(MI
.operands()))
1382 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&MI
, MO
))
1383 TC
.addOperand(*MaybeOperand
);
1384 OutStreamer
->emitInstruction(TC
, getSubtargetInfo());
1387 // Returns instruction preceding MBBI in MachineFunction.
1388 // If MBBI is the first instruction of the first basic block, returns null.
1389 static MachineBasicBlock::const_iterator
1390 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI
) {
1391 const MachineBasicBlock
*MBB
= MBBI
->getParent();
1392 while (MBBI
== MBB
->begin()) {
1393 if (MBB
== &MBB
->getParent()->front())
1394 return MachineBasicBlock::const_iterator();
1395 MBB
= MBB
->getPrevNode();
1402 static const Constant
*getConstantFromPool(const MachineInstr
&MI
,
1403 const MachineOperand
&Op
) {
1404 if (!Op
.isCPI() || Op
.getOffset() != 0)
1407 ArrayRef
<MachineConstantPoolEntry
> Constants
=
1408 MI
.getParent()->getParent()->getConstantPool()->getConstants();
1409 const MachineConstantPoolEntry
&ConstantEntry
= Constants
[Op
.getIndex()];
1411 // Bail if this is a machine constant pool entry, we won't be able to dig out
1413 if (ConstantEntry
.isMachineConstantPoolEntry())
1416 return ConstantEntry
.Val
.ConstVal
;
1419 static std::string
getShuffleComment(const MachineInstr
*MI
, unsigned SrcOp1Idx
,
1420 unsigned SrcOp2Idx
, ArrayRef
<int> Mask
) {
1421 std::string Comment
;
1423 // Compute the name for a register. This is really goofy because we have
1424 // multiple instruction printers that could (in theory) use different
1425 // names. Fortunately most people use the ATT style (outside of Windows)
1426 // and they actually agree on register naming here. Ultimately, this is
1427 // a comment, and so its OK if it isn't perfect.
1428 auto GetRegisterName
= [](MCRegister Reg
) -> StringRef
{
1429 return X86ATTInstPrinter::getRegisterName(Reg
);
1432 const MachineOperand
&DstOp
= MI
->getOperand(0);
1433 const MachineOperand
&SrcOp1
= MI
->getOperand(SrcOp1Idx
);
1434 const MachineOperand
&SrcOp2
= MI
->getOperand(SrcOp2Idx
);
1436 StringRef DstName
= DstOp
.isReg() ? GetRegisterName(DstOp
.getReg()) : "mem";
1437 StringRef Src1Name
=
1438 SrcOp1
.isReg() ? GetRegisterName(SrcOp1
.getReg()) : "mem";
1439 StringRef Src2Name
=
1440 SrcOp2
.isReg() ? GetRegisterName(SrcOp2
.getReg()) : "mem";
1442 // One source operand, fix the mask to print all elements in one span.
1443 SmallVector
<int, 8> ShuffleMask(Mask
);
1444 if (Src1Name
== Src2Name
)
1445 for (int i
= 0, e
= ShuffleMask
.size(); i
!= e
; ++i
)
1446 if (ShuffleMask
[i
] >= e
)
1447 ShuffleMask
[i
] -= e
;
1449 raw_string_ostream
CS(Comment
);
1452 // Handle AVX512 MASK/MASXZ write mask comments.
1454 // MASKZ: zmmX {%kY} {z}
1455 if (SrcOp1Idx
> 1) {
1456 assert((SrcOp1Idx
== 2 || SrcOp1Idx
== 3) && "Unexpected writemask");
1458 const MachineOperand
&WriteMaskOp
= MI
->getOperand(SrcOp1Idx
- 1);
1459 if (WriteMaskOp
.isReg()) {
1460 CS
<< " {%" << GetRegisterName(WriteMaskOp
.getReg()) << "}";
1462 if (SrcOp1Idx
== 2) {
1470 for (int i
= 0, e
= ShuffleMask
.size(); i
!= e
; ++i
) {
1473 if (ShuffleMask
[i
] == SM_SentinelZero
) {
1478 // Otherwise, it must come from src1 or src2. Print the span of elements
1479 // that comes from this src.
1480 bool isSrc1
= ShuffleMask
[i
] < (int)e
;
1481 CS
<< (isSrc1
? Src1Name
: Src2Name
) << '[';
1483 bool IsFirst
= true;
1484 while (i
!= e
&& ShuffleMask
[i
] != SM_SentinelZero
&&
1485 (ShuffleMask
[i
] < (int)e
) == isSrc1
) {
1490 if (ShuffleMask
[i
] == SM_SentinelUndef
)
1493 CS
<< ShuffleMask
[i
] % (int)e
;
1497 --i
; // For loop increments element #.
1504 static void printConstant(const APInt
&Val
, raw_ostream
&CS
) {
1505 if (Val
.getBitWidth() <= 64) {
1506 CS
<< Val
.getZExtValue();
1508 // print multi-word constant as (w0,w1)
1510 for (int i
= 0, N
= Val
.getNumWords(); i
< N
; ++i
) {
1513 CS
<< Val
.getRawData()[i
];
1519 static void printConstant(const APFloat
&Flt
, raw_ostream
&CS
) {
1520 SmallString
<32> Str
;
1521 // Force scientific notation to distinquish from integers.
1522 Flt
.toString(Str
, 0, 0);
1526 static void printConstant(const Constant
*COp
, unsigned BitWidth
,
1528 if (isa
<UndefValue
>(COp
)) {
1530 } else if (auto *CI
= dyn_cast
<ConstantInt
>(COp
)) {
1531 printConstant(CI
->getValue(), CS
);
1532 } else if (auto *CF
= dyn_cast
<ConstantFP
>(COp
)) {
1533 printConstant(CF
->getValueAPF(), CS
);
1534 } else if (auto *CDS
= dyn_cast
<ConstantDataSequential
>(COp
)) {
1535 Type
*EltTy
= CDS
->getElementType();
1536 bool IsInteger
= EltTy
->isIntegerTy();
1537 bool IsFP
= EltTy
->isHalfTy() || EltTy
->isFloatTy() || EltTy
->isDoubleTy();
1538 unsigned EltBits
= EltTy
->getPrimitiveSizeInBits();
1539 unsigned E
= std::min(BitWidth
/ EltBits
, CDS
->getNumElements());
1540 assert((BitWidth
% EltBits
) == 0 && "Broadcast element size mismatch");
1541 for (unsigned I
= 0; I
!= E
; ++I
) {
1545 printConstant(CDS
->getElementAsAPInt(I
), CS
);
1547 printConstant(CDS
->getElementAsAPFloat(I
), CS
);
1556 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr
*MI
) {
1557 assert(MF
->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1558 assert((getSubtarget().isOSWindows() || TM
.getTargetTriple().isUEFI()) &&
1559 "SEH_ instruction Windows and UEFI only");
1561 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1563 X86TargetStreamer
*XTS
=
1564 static_cast<X86TargetStreamer
*>(OutStreamer
->getTargetStreamer());
1565 switch (MI
->getOpcode()) {
1566 case X86::SEH_PushReg
:
1567 XTS
->emitFPOPushReg(MI
->getOperand(0).getImm());
1569 case X86::SEH_StackAlloc
:
1570 XTS
->emitFPOStackAlloc(MI
->getOperand(0).getImm());
1572 case X86::SEH_StackAlign
:
1573 XTS
->emitFPOStackAlign(MI
->getOperand(0).getImm());
1575 case X86::SEH_SetFrame
:
1576 assert(MI
->getOperand(1).getImm() == 0 &&
1577 ".cv_fpo_setframe takes no offset");
1578 XTS
->emitFPOSetFrame(MI
->getOperand(0).getImm());
1580 case X86::SEH_EndPrologue
:
1581 XTS
->emitFPOEndPrologue();
1583 case X86::SEH_SaveReg
:
1584 case X86::SEH_SaveXMM
:
1585 case X86::SEH_PushFrame
:
1586 llvm_unreachable("SEH_ directive incompatible with FPO");
1589 llvm_unreachable("expected SEH_ instruction");
1594 // Otherwise, use the .seh_ directives for all other Windows platforms.
1595 switch (MI
->getOpcode()) {
1596 case X86::SEH_PushReg
:
1597 OutStreamer
->emitWinCFIPushReg(MI
->getOperand(0).getImm());
1600 case X86::SEH_SaveReg
:
1601 OutStreamer
->emitWinCFISaveReg(MI
->getOperand(0).getImm(),
1602 MI
->getOperand(1).getImm());
1605 case X86::SEH_SaveXMM
:
1606 OutStreamer
->emitWinCFISaveXMM(MI
->getOperand(0).getImm(),
1607 MI
->getOperand(1).getImm());
1610 case X86::SEH_StackAlloc
:
1611 OutStreamer
->emitWinCFIAllocStack(MI
->getOperand(0).getImm());
1614 case X86::SEH_SetFrame
:
1615 OutStreamer
->emitWinCFISetFrame(MI
->getOperand(0).getImm(),
1616 MI
->getOperand(1).getImm());
1619 case X86::SEH_PushFrame
:
1620 OutStreamer
->emitWinCFIPushFrame(MI
->getOperand(0).getImm());
1623 case X86::SEH_EndPrologue
:
1624 OutStreamer
->emitWinCFIEndProlog();
1628 llvm_unreachable("expected SEH_ instruction");
1632 static unsigned getRegisterWidth(const MCOperandInfo
&Info
) {
1633 if (Info
.RegClass
== X86::VR128RegClassID
||
1634 Info
.RegClass
== X86::VR128XRegClassID
)
1636 if (Info
.RegClass
== X86::VR256RegClassID
||
1637 Info
.RegClass
== X86::VR256XRegClassID
)
1639 if (Info
.RegClass
== X86::VR512RegClassID
)
1641 llvm_unreachable("Unknown register class!");
1644 static void addConstantComments(const MachineInstr
*MI
,
1645 MCStreamer
&OutStreamer
) {
1646 switch (MI
->getOpcode()) {
1647 // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1648 // a constant shuffle mask. We won't be able to do this at the MC layer
1649 // because the mask isn't an immediate.
1651 case X86::VPSHUFBrm
:
1652 case X86::VPSHUFBYrm
:
1653 case X86::VPSHUFBZ128rm
:
1654 case X86::VPSHUFBZ128rmk
:
1655 case X86::VPSHUFBZ128rmkz
:
1656 case X86::VPSHUFBZ256rm
:
1657 case X86::VPSHUFBZ256rmk
:
1658 case X86::VPSHUFBZ256rmkz
:
1659 case X86::VPSHUFBZrm
:
1660 case X86::VPSHUFBZrmk
:
1661 case X86::VPSHUFBZrmkz
: {
1662 unsigned SrcIdx
= 1;
1663 if (X86II::isKMasked(MI
->getDesc().TSFlags
)) {
1664 // Skip mask operand.
1666 if (X86II::isKMergeMasked(MI
->getDesc().TSFlags
)) {
1667 // Skip passthru operand.
1671 unsigned MaskIdx
= SrcIdx
+ 1 + X86::AddrDisp
;
1673 assert(MI
->getNumOperands() >= (SrcIdx
+ 1 + X86::AddrNumOperands
) &&
1674 "Unexpected number of operands!");
1676 const MachineOperand
&MaskOp
= MI
->getOperand(MaskIdx
);
1677 if (auto *C
= getConstantFromPool(*MI
, MaskOp
)) {
1678 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1679 SmallVector
<int, 64> Mask
;
1680 DecodePSHUFBMask(C
, Width
, Mask
);
1682 OutStreamer
.AddComment(getShuffleComment(MI
, SrcIdx
, SrcIdx
, Mask
));
1687 case X86::VPERMILPSrm
:
1688 case X86::VPERMILPSYrm
:
1689 case X86::VPERMILPSZ128rm
:
1690 case X86::VPERMILPSZ128rmk
:
1691 case X86::VPERMILPSZ128rmkz
:
1692 case X86::VPERMILPSZ256rm
:
1693 case X86::VPERMILPSZ256rmk
:
1694 case X86::VPERMILPSZ256rmkz
:
1695 case X86::VPERMILPSZrm
:
1696 case X86::VPERMILPSZrmk
:
1697 case X86::VPERMILPSZrmkz
:
1698 case X86::VPERMILPDrm
:
1699 case X86::VPERMILPDYrm
:
1700 case X86::VPERMILPDZ128rm
:
1701 case X86::VPERMILPDZ128rmk
:
1702 case X86::VPERMILPDZ128rmkz
:
1703 case X86::VPERMILPDZ256rm
:
1704 case X86::VPERMILPDZ256rmk
:
1705 case X86::VPERMILPDZ256rmkz
:
1706 case X86::VPERMILPDZrm
:
1707 case X86::VPERMILPDZrmk
:
1708 case X86::VPERMILPDZrmkz
: {
1710 switch (MI
->getOpcode()) {
1711 default: llvm_unreachable("Invalid opcode");
1712 case X86::VPERMILPSrm
:
1713 case X86::VPERMILPSYrm
:
1714 case X86::VPERMILPSZ128rm
:
1715 case X86::VPERMILPSZ256rm
:
1716 case X86::VPERMILPSZrm
:
1717 case X86::VPERMILPSZ128rmkz
:
1718 case X86::VPERMILPSZ256rmkz
:
1719 case X86::VPERMILPSZrmkz
:
1720 case X86::VPERMILPSZ128rmk
:
1721 case X86::VPERMILPSZ256rmk
:
1722 case X86::VPERMILPSZrmk
:
1725 case X86::VPERMILPDrm
:
1726 case X86::VPERMILPDYrm
:
1727 case X86::VPERMILPDZ128rm
:
1728 case X86::VPERMILPDZ256rm
:
1729 case X86::VPERMILPDZrm
:
1730 case X86::VPERMILPDZ128rmkz
:
1731 case X86::VPERMILPDZ256rmkz
:
1732 case X86::VPERMILPDZrmkz
:
1733 case X86::VPERMILPDZ128rmk
:
1734 case X86::VPERMILPDZ256rmk
:
1735 case X86::VPERMILPDZrmk
:
1740 unsigned SrcIdx
= 1;
1741 if (X86II::isKMasked(MI
->getDesc().TSFlags
)) {
1742 // Skip mask operand.
1744 if (X86II::isKMergeMasked(MI
->getDesc().TSFlags
)) {
1745 // Skip passthru operand.
1749 unsigned MaskIdx
= SrcIdx
+ 1 + X86::AddrDisp
;
1751 assert(MI
->getNumOperands() >= (SrcIdx
+ 1 + X86::AddrNumOperands
) &&
1752 "Unexpected number of operands!");
1754 const MachineOperand
&MaskOp
= MI
->getOperand(MaskIdx
);
1755 if (auto *C
= getConstantFromPool(*MI
, MaskOp
)) {
1756 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1757 SmallVector
<int, 16> Mask
;
1758 DecodeVPERMILPMask(C
, ElSize
, Width
, Mask
);
1760 OutStreamer
.AddComment(getShuffleComment(MI
, SrcIdx
, SrcIdx
, Mask
));
1765 case X86::VPERMIL2PDrm
:
1766 case X86::VPERMIL2PSrm
:
1767 case X86::VPERMIL2PDYrm
:
1768 case X86::VPERMIL2PSYrm
: {
1769 assert(MI
->getNumOperands() >= (3 + X86::AddrNumOperands
+ 1) &&
1770 "Unexpected number of operands!");
1772 const MachineOperand
&CtrlOp
= MI
->getOperand(MI
->getNumOperands() - 1);
1773 if (!CtrlOp
.isImm())
1777 switch (MI
->getOpcode()) {
1778 default: llvm_unreachable("Invalid opcode");
1779 case X86::VPERMIL2PSrm
: case X86::VPERMIL2PSYrm
: ElSize
= 32; break;
1780 case X86::VPERMIL2PDrm
: case X86::VPERMIL2PDYrm
: ElSize
= 64; break;
1783 const MachineOperand
&MaskOp
= MI
->getOperand(3 + X86::AddrDisp
);
1784 if (auto *C
= getConstantFromPool(*MI
, MaskOp
)) {
1785 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1786 SmallVector
<int, 16> Mask
;
1787 DecodeVPERMIL2PMask(C
, (unsigned)CtrlOp
.getImm(), ElSize
, Width
, Mask
);
1789 OutStreamer
.AddComment(getShuffleComment(MI
, 1, 2, Mask
));
1794 case X86::VPPERMrrm
: {
1795 assert(MI
->getNumOperands() >= (3 + X86::AddrNumOperands
) &&
1796 "Unexpected number of operands!");
1798 const MachineOperand
&MaskOp
= MI
->getOperand(3 + X86::AddrDisp
);
1799 if (auto *C
= getConstantFromPool(*MI
, MaskOp
)) {
1800 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1801 SmallVector
<int, 16> Mask
;
1802 DecodeVPPERMMask(C
, Width
, Mask
);
1804 OutStreamer
.AddComment(getShuffleComment(MI
, 1, 2, Mask
));
1809 case X86::MMX_MOVQ64rm
: {
1810 assert(MI
->getNumOperands() == (1 + X86::AddrNumOperands
) &&
1811 "Unexpected number of operands!");
1812 if (auto *C
= getConstantFromPool(*MI
, MI
->getOperand(1 + X86::AddrDisp
))) {
1813 std::string Comment
;
1814 raw_string_ostream
CS(Comment
);
1815 const MachineOperand
&DstOp
= MI
->getOperand(0);
1816 CS
<< X86ATTInstPrinter::getRegisterName(DstOp
.getReg()) << " = ";
1817 if (auto *CF
= dyn_cast
<ConstantFP
>(C
)) {
1818 CS
<< "0x" << toString(CF
->getValueAPF().bitcastToAPInt(), 16, false);
1819 OutStreamer
.AddComment(CS
.str());
1825 #define MOV_CASE(Prefix, Suffix) \
1826 case X86::Prefix##MOVAPD##Suffix##rm: \
1827 case X86::Prefix##MOVAPS##Suffix##rm: \
1828 case X86::Prefix##MOVUPD##Suffix##rm: \
1829 case X86::Prefix##MOVUPS##Suffix##rm: \
1830 case X86::Prefix##MOVDQA##Suffix##rm: \
1831 case X86::Prefix##MOVDQU##Suffix##rm:
1833 #define MOV_AVX512_CASE(Suffix) \
1834 case X86::VMOVDQA64##Suffix##rm: \
1835 case X86::VMOVDQA32##Suffix##rm: \
1836 case X86::VMOVDQU64##Suffix##rm: \
1837 case X86::VMOVDQU32##Suffix##rm: \
1838 case X86::VMOVDQU16##Suffix##rm: \
1839 case X86::VMOVDQU8##Suffix##rm: \
1840 case X86::VMOVAPS##Suffix##rm: \
1841 case X86::VMOVAPD##Suffix##rm: \
1842 case X86::VMOVUPS##Suffix##rm: \
1843 case X86::VMOVUPD##Suffix##rm:
1845 #define CASE_ALL_MOV_RM() \
1846 MOV_CASE(, ) /* SSE */ \
1847 MOV_CASE(V, ) /* AVX-128 */ \
1848 MOV_CASE(V, Y) /* AVX-256 */ \
1849 MOV_AVX512_CASE(Z) \
1850 MOV_AVX512_CASE(Z256) \
1851 MOV_AVX512_CASE(Z128)
1853 // For loads from a constant pool to a vector register, print the constant
1856 case X86::VBROADCASTF128
:
1857 case X86::VBROADCASTI128
:
1858 case X86::VBROADCASTF32X4Z256rm
:
1859 case X86::VBROADCASTF32X4rm
:
1860 case X86::VBROADCASTF32X8rm
:
1861 case X86::VBROADCASTF64X2Z128rm
:
1862 case X86::VBROADCASTF64X2rm
:
1863 case X86::VBROADCASTF64X4rm
:
1864 case X86::VBROADCASTI32X4Z256rm
:
1865 case X86::VBROADCASTI32X4rm
:
1866 case X86::VBROADCASTI32X8rm
:
1867 case X86::VBROADCASTI64X2Z128rm
:
1868 case X86::VBROADCASTI64X2rm
:
1869 case X86::VBROADCASTI64X4rm
:
1870 assert(MI
->getNumOperands() >= (1 + X86::AddrNumOperands
) &&
1871 "Unexpected number of operands!");
1872 if (auto *C
= getConstantFromPool(*MI
, MI
->getOperand(1 + X86::AddrDisp
))) {
1874 // Override NumLanes for the broadcast instructions.
1875 switch (MI
->getOpcode()) {
1876 case X86::VBROADCASTF128
: NumLanes
= 2; break;
1877 case X86::VBROADCASTI128
: NumLanes
= 2; break;
1878 case X86::VBROADCASTF32X4Z256rm
: NumLanes
= 2; break;
1879 case X86::VBROADCASTF32X4rm
: NumLanes
= 4; break;
1880 case X86::VBROADCASTF32X8rm
: NumLanes
= 2; break;
1881 case X86::VBROADCASTF64X2Z128rm
: NumLanes
= 2; break;
1882 case X86::VBROADCASTF64X2rm
: NumLanes
= 4; break;
1883 case X86::VBROADCASTF64X4rm
: NumLanes
= 2; break;
1884 case X86::VBROADCASTI32X4Z256rm
: NumLanes
= 2; break;
1885 case X86::VBROADCASTI32X4rm
: NumLanes
= 4; break;
1886 case X86::VBROADCASTI32X8rm
: NumLanes
= 2; break;
1887 case X86::VBROADCASTI64X2Z128rm
: NumLanes
= 2; break;
1888 case X86::VBROADCASTI64X2rm
: NumLanes
= 4; break;
1889 case X86::VBROADCASTI64X4rm
: NumLanes
= 2; break;
1892 std::string Comment
;
1893 raw_string_ostream
CS(Comment
);
1894 const MachineOperand
&DstOp
= MI
->getOperand(0);
1895 CS
<< X86ATTInstPrinter::getRegisterName(DstOp
.getReg()) << " = ";
1896 if (auto *CDS
= dyn_cast
<ConstantDataSequential
>(C
)) {
1898 for (int l
= 0; l
!= NumLanes
; ++l
) {
1899 for (int i
= 0, NumElements
= CDS
->getNumElements(); i
< NumElements
;
1901 if (i
!= 0 || l
!= 0)
1903 if (CDS
->getElementType()->isIntegerTy())
1904 printConstant(CDS
->getElementAsAPInt(i
), CS
);
1905 else if (CDS
->getElementType()->isHalfTy() ||
1906 CDS
->getElementType()->isFloatTy() ||
1907 CDS
->getElementType()->isDoubleTy())
1908 printConstant(CDS
->getElementAsAPFloat(i
), CS
);
1914 OutStreamer
.AddComment(CS
.str());
1915 } else if (auto *CV
= dyn_cast
<ConstantVector
>(C
)) {
1917 for (int l
= 0; l
!= NumLanes
; ++l
) {
1918 for (int i
= 0, NumOperands
= CV
->getNumOperands(); i
< NumOperands
;
1920 if (i
!= 0 || l
!= 0)
1922 printConstant(CV
->getOperand(i
),
1923 CV
->getType()->getPrimitiveSizeInBits(), CS
);
1927 OutStreamer
.AddComment(CS
.str());
1932 case X86::MOVDDUPrm
:
1933 case X86::VMOVDDUPrm
:
1934 case X86::VMOVDDUPZ128rm
:
1935 case X86::VBROADCASTSSrm
:
1936 case X86::VBROADCASTSSYrm
:
1937 case X86::VBROADCASTSSZ128rm
:
1938 case X86::VBROADCASTSSZ256rm
:
1939 case X86::VBROADCASTSSZrm
:
1940 case X86::VBROADCASTSDYrm
:
1941 case X86::VBROADCASTSDZ256rm
:
1942 case X86::VBROADCASTSDZrm
:
1943 case X86::VPBROADCASTBrm
:
1944 case X86::VPBROADCASTBYrm
:
1945 case X86::VPBROADCASTBZ128rm
:
1946 case X86::VPBROADCASTBZ256rm
:
1947 case X86::VPBROADCASTBZrm
:
1948 case X86::VPBROADCASTDrm
:
1949 case X86::VPBROADCASTDYrm
:
1950 case X86::VPBROADCASTDZ128rm
:
1951 case X86::VPBROADCASTDZ256rm
:
1952 case X86::VPBROADCASTDZrm
:
1953 case X86::VPBROADCASTQrm
:
1954 case X86::VPBROADCASTQYrm
:
1955 case X86::VPBROADCASTQZ128rm
:
1956 case X86::VPBROADCASTQZ256rm
:
1957 case X86::VPBROADCASTQZrm
:
1958 case X86::VPBROADCASTWrm
:
1959 case X86::VPBROADCASTWYrm
:
1960 case X86::VPBROADCASTWZ128rm
:
1961 case X86::VPBROADCASTWZ256rm
:
1962 case X86::VPBROADCASTWZrm
:
1963 assert(MI
->getNumOperands() >= (1 + X86::AddrNumOperands
) &&
1964 "Unexpected number of operands!");
1965 if (auto *C
= getConstantFromPool(*MI
, MI
->getOperand(1 + X86::AddrDisp
))) {
1966 int NumElts
, EltBits
;
1967 switch (MI
->getOpcode()) {
1968 default: llvm_unreachable("Invalid opcode");
1969 case X86::MOVDDUPrm
: NumElts
= 2; EltBits
= 64; break;
1970 case X86::VMOVDDUPrm
: NumElts
= 2; EltBits
= 64; break;
1971 case X86::VMOVDDUPZ128rm
: NumElts
= 2; EltBits
= 64; break;
1972 case X86::VBROADCASTSSrm
: NumElts
= 4; EltBits
= 32; break;
1973 case X86::VBROADCASTSSYrm
: NumElts
= 8; EltBits
= 32; break;
1974 case X86::VBROADCASTSSZ128rm
: NumElts
= 4; EltBits
= 32; break;
1975 case X86::VBROADCASTSSZ256rm
: NumElts
= 8; EltBits
= 32; break;
1976 case X86::VBROADCASTSSZrm
: NumElts
= 16; EltBits
= 32; break;
1977 case X86::VBROADCASTSDYrm
: NumElts
= 4; EltBits
= 64; break;
1978 case X86::VBROADCASTSDZ256rm
: NumElts
= 4; EltBits
= 64; break;
1979 case X86::VBROADCASTSDZrm
: NumElts
= 8; EltBits
= 64; break;
1980 case X86::VPBROADCASTBrm
: NumElts
= 16; EltBits
= 8; break;
1981 case X86::VPBROADCASTBYrm
: NumElts
= 32; EltBits
= 8; break;
1982 case X86::VPBROADCASTBZ128rm
: NumElts
= 16; EltBits
= 8; break;
1983 case X86::VPBROADCASTBZ256rm
: NumElts
= 32; EltBits
= 8; break;
1984 case X86::VPBROADCASTBZrm
: NumElts
= 64; EltBits
= 8; break;
1985 case X86::VPBROADCASTDrm
: NumElts
= 4; EltBits
= 32; break;
1986 case X86::VPBROADCASTDYrm
: NumElts
= 8; EltBits
= 32; break;
1987 case X86::VPBROADCASTDZ128rm
: NumElts
= 4; EltBits
= 32; break;
1988 case X86::VPBROADCASTDZ256rm
: NumElts
= 8; EltBits
= 32; break;
1989 case X86::VPBROADCASTDZrm
: NumElts
= 16; EltBits
= 32; break;
1990 case X86::VPBROADCASTQrm
: NumElts
= 2; EltBits
= 64; break;
1991 case X86::VPBROADCASTQYrm
: NumElts
= 4; EltBits
= 64; break;
1992 case X86::VPBROADCASTQZ128rm
: NumElts
= 2; EltBits
= 64; break;
1993 case X86::VPBROADCASTQZ256rm
: NumElts
= 4; EltBits
= 64; break;
1994 case X86::VPBROADCASTQZrm
: NumElts
= 8; EltBits
= 64; break;
1995 case X86::VPBROADCASTWrm
: NumElts
= 8; EltBits
= 16; break;
1996 case X86::VPBROADCASTWYrm
: NumElts
= 16; EltBits
= 16; break;
1997 case X86::VPBROADCASTWZ128rm
: NumElts
= 8; EltBits
= 16; break;
1998 case X86::VPBROADCASTWZ256rm
: NumElts
= 16; EltBits
= 16; break;
1999 case X86::VPBROADCASTWZrm
: NumElts
= 32; EltBits
= 16; break;
2002 std::string Comment
;
2003 raw_string_ostream
CS(Comment
);
2004 const MachineOperand
&DstOp
= MI
->getOperand(0);
2005 CS
<< X86ATTInstPrinter::getRegisterName(DstOp
.getReg()) << " = ";
2007 for (int i
= 0; i
!= NumElts
; ++i
) {
2010 printConstant(C
, EltBits
, CS
);
2013 OutStreamer
.AddComment(CS
.str());
2018 void X86AsmPrinter::emitInstruction(const MachineInstr
*MI
) {
2019 // FIXME: Enable feature predicate checks once all the test pass.
2020 // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2021 // Subtarget->getFeatureBits());
2023 X86MCInstLower
MCInstLowering(*MF
, *this);
2024 const X86RegisterInfo
*RI
=
2025 MF
->getSubtarget
<X86Subtarget
>().getRegisterInfo();
2027 if (MI
->getOpcode() == X86::OR64rm
) {
2028 for (auto &Opd
: MI
->operands()) {
2029 if (Opd
.isSymbol() && StringRef(Opd
.getSymbolName()) ==
2030 "swift_async_extendedFramePointerFlags") {
2031 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags
= true;
2036 // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2037 // are compressed from EVEX encoding to VEX encoding.
2038 if (TM
.Options
.MCOptions
.ShowMCEncoding
) {
2039 if (MI
->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX
)
2040 OutStreamer
->AddComment("EVEX TO VEX Compression ", false);
2043 // Add comments for values loaded from constant pool.
2044 if (OutStreamer
->isVerboseAsm())
2045 addConstantComments(MI
, *OutStreamer
);
2047 switch (MI
->getOpcode()) {
2048 case TargetOpcode::DBG_VALUE
:
2049 llvm_unreachable("Should be handled target independently");
2051 case X86::EH_RETURN
:
2052 case X86::EH_RETURN64
: {
2053 // Lower these as normal, but add some comments.
2054 Register Reg
= MI
->getOperand(0).getReg();
2055 OutStreamer
->AddComment(StringRef("eh_return, addr: %") +
2056 X86ATTInstPrinter::getRegisterName(Reg
));
2059 case X86::CLEANUPRET
: {
2060 // Lower these as normal, but add some comments.
2061 OutStreamer
->AddComment("CLEANUPRET");
2065 case X86::CATCHRET
: {
2066 // Lower these as normal, but add some comments.
2067 OutStreamer
->AddComment("CATCHRET");
2072 case X86::ENDBR64
: {
2073 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2074 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2075 // non-empty. If MI is the initial ENDBR, place the
2076 // __patchable_function_entries label after ENDBR.
2077 if (CurrentPatchableFunctionEntrySym
&&
2078 CurrentPatchableFunctionEntrySym
== CurrentFnBegin
&&
2079 MI
== &MF
->front().front()) {
2081 MCInstLowering
.Lower(MI
, Inst
);
2082 EmitAndCountInstruction(Inst
);
2083 CurrentPatchableFunctionEntrySym
= createTempSymbol("patch");
2084 OutStreamer
->emitLabel(CurrentPatchableFunctionEntrySym
);
2090 case X86::TAILJMPd64
:
2091 if (IndCSPrefix
&& MI
->hasRegisterImplicitUseOperand(X86::R11
))
2092 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX
));
2097 case X86::TAILJMPd_CC
:
2098 case X86::TAILJMPr64
:
2099 case X86::TAILJMPm64
:
2100 case X86::TAILJMPd64_CC
:
2101 case X86::TAILJMPr64_REX
:
2102 case X86::TAILJMPm64_REX
:
2103 // Lower these as normal, but add some comments.
2104 OutStreamer
->AddComment("TAILCALL");
2107 case X86::TLS_addr32
:
2108 case X86::TLS_addr64
:
2109 case X86::TLS_addrX32
:
2110 case X86::TLS_base_addr32
:
2111 case X86::TLS_base_addr64
:
2112 case X86::TLS_base_addrX32
:
2113 return LowerTlsAddr(MCInstLowering
, *MI
);
2115 case X86::MOVPC32r
: {
2116 // This is a pseudo op for a two instruction sequence with a label, which
2123 MCSymbol
*PICBase
= MF
->getPICBaseSymbol();
2124 // FIXME: We would like an efficient form for this, so we don't have to do a
2125 // lot of extra uniquing.
2126 EmitAndCountInstruction(
2127 MCInstBuilder(X86::CALLpcrel32
)
2128 .addExpr(MCSymbolRefExpr::create(PICBase
, OutContext
)));
2130 const X86FrameLowering
*FrameLowering
=
2131 MF
->getSubtarget
<X86Subtarget
>().getFrameLowering();
2132 bool hasFP
= FrameLowering
->hasFP(*MF
);
2134 // TODO: This is needed only if we require precise CFA.
2135 bool HasActiveDwarfFrame
= OutStreamer
->getNumFrameInfos() &&
2136 !OutStreamer
->getDwarfFrameInfos().back().End
;
2138 int stackGrowth
= -RI
->getSlotSize();
2140 if (HasActiveDwarfFrame
&& !hasFP
) {
2141 OutStreamer
->emitCFIAdjustCfaOffset(-stackGrowth
);
2142 MF
->getInfo
<X86MachineFunctionInfo
>()->setHasCFIAdjustCfa(true);
2146 OutStreamer
->emitLabel(PICBase
);
2149 EmitAndCountInstruction(
2150 MCInstBuilder(X86::POP32r
).addReg(MI
->getOperand(0).getReg()));
2152 if (HasActiveDwarfFrame
&& !hasFP
) {
2153 OutStreamer
->emitCFIAdjustCfaOffset(stackGrowth
);
2158 case X86::ADD32ri
: {
2159 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2160 if (MI
->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS
)
2163 // Okay, we have something like:
2164 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2166 // For this, we want to print something like:
2167 // MYGLOBAL + (. - PICBASE)
2168 // However, we can't generate a ".", so just emit a new label here and refer
2170 MCSymbol
*DotSym
= OutContext
.createTempSymbol();
2171 OutStreamer
->emitLabel(DotSym
);
2173 // Now that we have emitted the label, lower the complex operand expression.
2174 MCSymbol
*OpSym
= MCInstLowering
.GetSymbolFromOperand(MI
->getOperand(2));
2176 const MCExpr
*DotExpr
= MCSymbolRefExpr::create(DotSym
, OutContext
);
2177 const MCExpr
*PICBase
=
2178 MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), OutContext
);
2179 DotExpr
= MCBinaryExpr::createSub(DotExpr
, PICBase
, OutContext
);
2181 DotExpr
= MCBinaryExpr::createAdd(
2182 MCSymbolRefExpr::create(OpSym
, OutContext
), DotExpr
, OutContext
);
2184 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri
)
2185 .addReg(MI
->getOperand(0).getReg())
2186 .addReg(MI
->getOperand(1).getReg())
2190 case TargetOpcode::STATEPOINT
:
2191 return LowerSTATEPOINT(*MI
, MCInstLowering
);
2193 case TargetOpcode::FAULTING_OP
:
2194 return LowerFAULTING_OP(*MI
, MCInstLowering
);
2196 case TargetOpcode::FENTRY_CALL
:
2197 return LowerFENTRY_CALL(*MI
, MCInstLowering
);
2199 case TargetOpcode::PATCHABLE_OP
:
2200 return LowerPATCHABLE_OP(*MI
, MCInstLowering
);
2202 case TargetOpcode::STACKMAP
:
2203 return LowerSTACKMAP(*MI
);
2205 case TargetOpcode::PATCHPOINT
:
2206 return LowerPATCHPOINT(*MI
, MCInstLowering
);
2208 case TargetOpcode::PATCHABLE_FUNCTION_ENTER
:
2209 return LowerPATCHABLE_FUNCTION_ENTER(*MI
, MCInstLowering
);
2211 case TargetOpcode::PATCHABLE_RET
:
2212 return LowerPATCHABLE_RET(*MI
, MCInstLowering
);
2214 case TargetOpcode::PATCHABLE_TAIL_CALL
:
2215 return LowerPATCHABLE_TAIL_CALL(*MI
, MCInstLowering
);
2217 case TargetOpcode::PATCHABLE_EVENT_CALL
:
2218 return LowerPATCHABLE_EVENT_CALL(*MI
, MCInstLowering
);
2220 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL
:
2221 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI
, MCInstLowering
);
2223 case X86::MORESTACK_RET
:
2224 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget
)));
2227 case X86::KCFI_CHECK
:
2228 return LowerKCFI_CHECK(*MI
);
2230 case X86::ASAN_CHECK_MEMACCESS
:
2231 return LowerASAN_CHECK_MEMACCESS(*MI
);
2233 case X86::MORESTACK_RET_RESTORE_R10
:
2234 // Return, then restore R10.
2235 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget
)));
2236 EmitAndCountInstruction(
2237 MCInstBuilder(X86::MOV64rr
).addReg(X86::R10
).addReg(X86::RAX
));
2240 case X86::SEH_PushReg
:
2241 case X86::SEH_SaveReg
:
2242 case X86::SEH_SaveXMM
:
2243 case X86::SEH_StackAlloc
:
2244 case X86::SEH_StackAlign
:
2245 case X86::SEH_SetFrame
:
2246 case X86::SEH_PushFrame
:
2247 case X86::SEH_EndPrologue
:
2248 EmitSEHInstruction(MI
);
2251 case X86::SEH_Epilogue
: {
2252 assert(MF
->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2253 MachineBasicBlock::const_iterator
MBBI(MI
);
2254 // Check if preceded by a call and emit nop if so.
2255 for (MBBI
= PrevCrossBBInst(MBBI
);
2256 MBBI
!= MachineBasicBlock::const_iterator();
2257 MBBI
= PrevCrossBBInst(MBBI
)) {
2258 // Pseudo instructions that aren't a call are assumed to not emit any
2259 // code. If they do, we worst case generate unnecessary noops after a
2261 if (MBBI
->isCall() || !MBBI
->isPseudo()) {
2263 EmitAndCountInstruction(MCInstBuilder(X86::NOOP
));
2269 case X86::UBSAN_UD1
:
2270 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm
)
2274 .addReg(X86::NoRegister
)
2275 .addImm(MI
->getOperand(0).getImm())
2276 .addReg(X86::NoRegister
));
2278 case X86::CALL64pcrel32
:
2279 if (IndCSPrefix
&& MI
->hasRegisterImplicitUseOperand(X86::R11
))
2280 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX
));
2285 MCInstLowering
.Lower(MI
, TmpInst
);
2287 // Stackmap shadows cannot include branch targets, so we can count the bytes
2288 // in a call towards the shadow, but must ensure that the no thread returns
2289 // in to the stackmap shadow. The only way to achieve this is if the call
2290 // is at the end of the shadow.
2292 // Count then size of the call towards the shadow
2293 SMShadowTracker
.count(TmpInst
, getSubtargetInfo(), CodeEmitter
.get());
2294 // Then flush the shadow so that we fill with nops before the call, not
2296 SMShadowTracker
.emitShadowPadding(*OutStreamer
, getSubtargetInfo());
2297 // Then emit the call
2298 OutStreamer
->emitInstruction(TmpInst
, getSubtargetInfo());
2302 EmitAndCountInstruction(TmpInst
);