1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains code to lower X86 MachineInstrs to their corresponding
12 //===----------------------------------------------------------------------===//
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86EncodingOptimization.h"
17 #include "MCTargetDesc/X86InstComments.h"
18 #include "MCTargetDesc/X86ShuffleDecode.h"
19 #include "MCTargetDesc/X86TargetStreamer.h"
20 #include "X86AsmPrinter.h"
21 #include "X86MachineFunctionInfo.h"
22 #include "X86RegisterInfo.h"
23 #include "X86ShuffleDecodeConstantPool.h"
24 #include "X86Subtarget.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
30 #include "llvm/CodeGen/MachineOperand.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/GlobalValue.h"
34 #include "llvm/IR/Mangler.h"
35 #include "llvm/MC/MCAsmInfo.h"
36 #include "llvm/MC/MCCodeEmitter.h"
37 #include "llvm/MC/MCContext.h"
38 #include "llvm/MC/MCExpr.h"
39 #include "llvm/MC/MCFixup.h"
40 #include "llvm/MC/MCInst.h"
41 #include "llvm/MC/MCInstBuilder.h"
42 #include "llvm/MC/MCSection.h"
43 #include "llvm/MC/MCSectionELF.h"
44 #include "llvm/MC/MCStreamer.h"
45 #include "llvm/MC/MCSymbol.h"
46 #include "llvm/MC/MCSymbolELF.h"
47 #include "llvm/MC/TargetRegistry.h"
48 #include "llvm/Target/TargetLoweringObjectFile.h"
49 #include "llvm/Target/TargetMachine.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
58 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
59 class X86MCInstLower
{
61 const MachineFunction
&MF
;
62 const TargetMachine
&TM
;
64 X86AsmPrinter
&AsmPrinter
;
67 X86MCInstLower(const MachineFunction
&MF
, X86AsmPrinter
&asmprinter
);
69 std::optional
<MCOperand
> LowerMachineOperand(const MachineInstr
*MI
,
70 const MachineOperand
&MO
) const;
71 void Lower(const MachineInstr
*MI
, MCInst
&OutMI
) const;
73 MCSymbol
*GetSymbolFromOperand(const MachineOperand
&MO
) const;
74 MCOperand
LowerSymbolOperand(const MachineOperand
&MO
, MCSymbol
*Sym
) const;
77 MachineModuleInfoMachO
&getMachOMMI() const;
80 } // end anonymous namespace
82 /// A RAII helper which defines a region of instructions which can't have
83 /// padding added between them for correctness.
84 struct NoAutoPaddingScope
{
86 const bool OldAllowAutoPadding
;
87 NoAutoPaddingScope(MCStreamer
&OS
)
88 : OS(OS
), OldAllowAutoPadding(OS
.getAllowAutoPadding()) {
89 changeAndComment(false);
91 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding
); }
92 void changeAndComment(bool b
) {
93 if (b
== OS
.getAllowAutoPadding())
95 OS
.setAllowAutoPadding(b
);
97 OS
.emitRawComment("autopadding");
99 OS
.emitRawComment("noautopadding");
103 // Emit a minimal sequence of nops spanning NumBytes bytes.
104 static void emitX86Nops(MCStreamer
&OS
, unsigned NumBytes
,
105 const X86Subtarget
*Subtarget
);
107 void X86AsmPrinter::StackMapShadowTracker::count(MCInst
&Inst
,
108 const MCSubtargetInfo
&STI
,
109 MCCodeEmitter
*CodeEmitter
) {
111 SmallString
<256> Code
;
112 SmallVector
<MCFixup
, 4> Fixups
;
113 CodeEmitter
->encodeInstruction(Inst
, Code
, Fixups
, STI
);
114 CurrentShadowSize
+= Code
.size();
115 if (CurrentShadowSize
>= RequiredShadowSize
)
116 InShadow
= false; // The shadow is big enough. Stop counting.
120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121 MCStreamer
&OutStreamer
, const MCSubtargetInfo
&STI
) {
122 if (InShadow
&& CurrentShadowSize
< RequiredShadowSize
) {
124 emitX86Nops(OutStreamer
, RequiredShadowSize
- CurrentShadowSize
,
125 &MF
->getSubtarget
<X86Subtarget
>());
129 void X86AsmPrinter::EmitAndCountInstruction(MCInst
&Inst
) {
130 OutStreamer
->emitInstruction(Inst
, getSubtargetInfo());
131 SMShadowTracker
.count(Inst
, getSubtargetInfo(), CodeEmitter
.get());
134 X86MCInstLower::X86MCInstLower(const MachineFunction
&mf
,
135 X86AsmPrinter
&asmprinter
)
136 : Ctx(mf
.getContext()), MF(mf
), TM(mf
.getTarget()), MAI(*TM
.getMCAsmInfo()),
137 AsmPrinter(asmprinter
) {}
139 MachineModuleInfoMachO
&X86MCInstLower::getMachOMMI() const {
140 return MF
.getMMI().getObjFileInfo
<MachineModuleInfoMachO
>();
143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144 /// operand to an MCSymbol.
145 MCSymbol
*X86MCInstLower::GetSymbolFromOperand(const MachineOperand
&MO
) const {
146 const Triple
&TT
= TM
.getTargetTriple();
147 if (MO
.isGlobal() && TT
.isOSBinFormatELF())
148 return AsmPrinter
.getSymbolPreferLocal(*MO
.getGlobal());
150 const DataLayout
&DL
= MF
.getDataLayout();
151 assert((MO
.isGlobal() || MO
.isSymbol() || MO
.isMBB()) &&
152 "Isn't a symbol reference");
154 MCSymbol
*Sym
= nullptr;
155 SmallString
<128> Name
;
158 switch (MO
.getTargetFlags()) {
159 case X86II::MO_DLLIMPORT
:
160 // Handle dllimport linkage.
163 case X86II::MO_COFFSTUB
:
166 case X86II::MO_DARWIN_NONLAZY
:
167 case X86II::MO_DARWIN_NONLAZY_PIC_BASE
:
168 Suffix
= "$non_lazy_ptr";
173 Name
+= DL
.getPrivateGlobalPrefix();
176 const GlobalValue
*GV
= MO
.getGlobal();
177 AsmPrinter
.getNameWithPrefix(Name
, GV
);
178 } else if (MO
.isSymbol()) {
179 Mangler::getNameWithPrefix(Name
, MO
.getSymbolName(), DL
);
180 } else if (MO
.isMBB()) {
181 assert(Suffix
.empty());
182 Sym
= MO
.getMBB()->getSymbol();
187 Sym
= Ctx
.getOrCreateSymbol(Name
);
189 // If the target flags on the operand changes the name of the symbol, do that
190 // before we return the symbol.
191 switch (MO
.getTargetFlags()) {
194 case X86II::MO_COFFSTUB
: {
195 MachineModuleInfoCOFF
&MMICOFF
=
196 MF
.getMMI().getObjFileInfo
<MachineModuleInfoCOFF
>();
197 MachineModuleInfoImpl::StubValueTy
&StubSym
= MMICOFF
.getGVStubEntry(Sym
);
198 if (!StubSym
.getPointer()) {
199 assert(MO
.isGlobal() && "Extern symbol not handled yet");
200 StubSym
= MachineModuleInfoImpl::StubValueTy(
201 AsmPrinter
.getSymbol(MO
.getGlobal()), true);
205 case X86II::MO_DARWIN_NONLAZY
:
206 case X86II::MO_DARWIN_NONLAZY_PIC_BASE
: {
207 MachineModuleInfoImpl::StubValueTy
&StubSym
=
208 getMachOMMI().getGVStubEntry(Sym
);
209 if (!StubSym
.getPointer()) {
210 assert(MO
.isGlobal() && "Extern symbol not handled yet");
211 StubSym
= MachineModuleInfoImpl::StubValueTy(
212 AsmPrinter
.getSymbol(MO
.getGlobal()),
213 !MO
.getGlobal()->hasInternalLinkage());
222 MCOperand
X86MCInstLower::LowerSymbolOperand(const MachineOperand
&MO
,
223 MCSymbol
*Sym
) const {
224 // FIXME: We would like an efficient form for this, so we don't have to do a
225 // lot of extra uniquing.
226 const MCExpr
*Expr
= nullptr;
227 MCSymbolRefExpr::VariantKind RefKind
= MCSymbolRefExpr::VK_None
;
229 switch (MO
.getTargetFlags()) {
231 llvm_unreachable("Unknown target flag on GV operand");
232 case X86II::MO_NO_FLAG
: // No flag.
233 // These affect the name of the symbol, not any suffix.
234 case X86II::MO_DARWIN_NONLAZY
:
235 case X86II::MO_DLLIMPORT
:
236 case X86II::MO_COFFSTUB
:
240 RefKind
= MCSymbolRefExpr::VK_TLVP
;
242 case X86II::MO_TLVP_PIC_BASE
:
243 Expr
= MCSymbolRefExpr::create(Sym
, MCSymbolRefExpr::VK_TLVP
, Ctx
);
244 // Subtract the pic base.
245 Expr
= MCBinaryExpr::createSub(
246 Expr
, MCSymbolRefExpr::create(MF
.getPICBaseSymbol(), Ctx
), Ctx
);
248 case X86II::MO_SECREL
:
249 RefKind
= MCSymbolRefExpr::VK_SECREL
;
251 case X86II::MO_TLSGD
:
252 RefKind
= MCSymbolRefExpr::VK_TLSGD
;
254 case X86II::MO_TLSLD
:
255 RefKind
= MCSymbolRefExpr::VK_TLSLD
;
257 case X86II::MO_TLSLDM
:
258 RefKind
= MCSymbolRefExpr::VK_TLSLDM
;
260 case X86II::MO_GOTTPOFF
:
261 RefKind
= MCSymbolRefExpr::VK_GOTTPOFF
;
263 case X86II::MO_INDNTPOFF
:
264 RefKind
= MCSymbolRefExpr::VK_INDNTPOFF
;
266 case X86II::MO_TPOFF
:
267 RefKind
= MCSymbolRefExpr::VK_TPOFF
;
269 case X86II::MO_DTPOFF
:
270 RefKind
= MCSymbolRefExpr::VK_DTPOFF
;
272 case X86II::MO_NTPOFF
:
273 RefKind
= MCSymbolRefExpr::VK_NTPOFF
;
275 case X86II::MO_GOTNTPOFF
:
276 RefKind
= MCSymbolRefExpr::VK_GOTNTPOFF
;
278 case X86II::MO_GOTPCREL
:
279 RefKind
= MCSymbolRefExpr::VK_GOTPCREL
;
281 case X86II::MO_GOTPCREL_NORELAX
:
282 RefKind
= MCSymbolRefExpr::VK_GOTPCREL_NORELAX
;
285 RefKind
= MCSymbolRefExpr::VK_GOT
;
287 case X86II::MO_GOTOFF
:
288 RefKind
= MCSymbolRefExpr::VK_GOTOFF
;
291 RefKind
= MCSymbolRefExpr::VK_PLT
;
294 RefKind
= MCSymbolRefExpr::VK_X86_ABS8
;
296 case X86II::MO_PIC_BASE_OFFSET
:
297 case X86II::MO_DARWIN_NONLAZY_PIC_BASE
:
298 Expr
= MCSymbolRefExpr::create(Sym
, Ctx
);
299 // Subtract the pic base.
300 Expr
= MCBinaryExpr::createSub(
301 Expr
, MCSymbolRefExpr::create(MF
.getPICBaseSymbol(), Ctx
), Ctx
);
303 assert(MAI
.doesSetDirectiveSuppressReloc());
304 // If .set directive is supported, use it to reduce the number of
305 // relocations the assembler will generate for differences between
306 // local labels. This is only safe when the symbols are in the same
307 // section so we are restricting it to jumptable references.
308 MCSymbol
*Label
= Ctx
.createTempSymbol();
309 AsmPrinter
.OutStreamer
->emitAssignment(Label
, Expr
);
310 Expr
= MCSymbolRefExpr::create(Label
, Ctx
);
316 Expr
= MCSymbolRefExpr::create(Sym
, RefKind
, Ctx
);
318 if (!MO
.isJTI() && !MO
.isMBB() && MO
.getOffset())
319 Expr
= MCBinaryExpr::createAdd(
320 Expr
, MCConstantExpr::create(MO
.getOffset(), Ctx
), Ctx
);
321 return MCOperand::createExpr(Expr
);
324 static unsigned getRetOpcode(const X86Subtarget
&Subtarget
) {
325 return Subtarget
.is64Bit() ? X86::RET64
: X86::RET32
;
328 std::optional
<MCOperand
>
329 X86MCInstLower::LowerMachineOperand(const MachineInstr
*MI
,
330 const MachineOperand
&MO
) const {
331 switch (MO
.getType()) {
334 llvm_unreachable("unknown operand type");
335 case MachineOperand::MO_Register
:
336 // Ignore all implicit register operands.
339 return MCOperand::createReg(MO
.getReg());
340 case MachineOperand::MO_Immediate
:
341 return MCOperand::createImm(MO
.getImm());
342 case MachineOperand::MO_MachineBasicBlock
:
343 case MachineOperand::MO_GlobalAddress
:
344 case MachineOperand::MO_ExternalSymbol
:
345 return LowerSymbolOperand(MO
, GetSymbolFromOperand(MO
));
346 case MachineOperand::MO_MCSymbol
:
347 return LowerSymbolOperand(MO
, MO
.getMCSymbol());
348 case MachineOperand::MO_JumpTableIndex
:
349 return LowerSymbolOperand(MO
, AsmPrinter
.GetJTISymbol(MO
.getIndex()));
350 case MachineOperand::MO_ConstantPoolIndex
:
351 return LowerSymbolOperand(MO
, AsmPrinter
.GetCPISymbol(MO
.getIndex()));
352 case MachineOperand::MO_BlockAddress
:
353 return LowerSymbolOperand(
354 MO
, AsmPrinter
.GetBlockAddressSymbol(MO
.getBlockAddress()));
355 case MachineOperand::MO_RegisterMask
:
356 // Ignore call clobbers.
361 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
363 static unsigned convertTailJumpOpcode(unsigned Opcode
) {
366 Opcode
= X86::JMP32r
;
369 Opcode
= X86::JMP32m
;
371 case X86::TAILJMPr64
:
372 Opcode
= X86::JMP64r
;
374 case X86::TAILJMPm64
:
375 Opcode
= X86::JMP64m
;
377 case X86::TAILJMPr64_REX
:
378 Opcode
= X86::JMP64r_REX
;
380 case X86::TAILJMPm64_REX
:
381 Opcode
= X86::JMP64m_REX
;
384 case X86::TAILJMPd64
:
387 case X86::TAILJMPd_CC
:
388 case X86::TAILJMPd64_CC
:
396 void X86MCInstLower::Lower(const MachineInstr
*MI
, MCInst
&OutMI
) const {
397 OutMI
.setOpcode(MI
->getOpcode());
399 for (const MachineOperand
&MO
: MI
->operands())
400 if (auto MaybeMCOp
= LowerMachineOperand(MI
, MO
))
401 OutMI
.addOperand(*MaybeMCOp
);
403 bool In64BitMode
= AsmPrinter
.getSubtarget().is64Bit();
404 if (X86::optimizeInstFromVEX3ToVEX2(OutMI
, MI
->getDesc()) ||
405 X86::optimizeShiftRotateWithImmediateOne(OutMI
) ||
406 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI
) ||
407 X86::optimizeMOVSX(OutMI
) || X86::optimizeINCDEC(OutMI
, In64BitMode
) ||
408 X86::optimizeMOV(OutMI
, In64BitMode
) ||
409 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI
))
412 // Handle a few special cases to eliminate operand modifiers.
413 switch (OutMI
.getOpcode()) {
418 // LEA should have a segment register, but it must be empty.
419 assert(OutMI
.getNumOperands() == 1 + X86::AddrNumOperands
&&
420 "Unexpected # of LEA operands");
421 assert(OutMI
.getOperand(1 + X86::AddrSegmentReg
).getReg() == 0 &&
422 "LEA has segment specified!");
427 case X86::MULX64Hrm
: {
428 // Turn into regular MULX by duplicating the destination.
430 switch (OutMI
.getOpcode()) {
431 default: llvm_unreachable("Invalid opcode");
432 case X86::MULX32Hrr
: NewOpc
= X86::MULX32rr
; break;
433 case X86::MULX32Hrm
: NewOpc
= X86::MULX32rm
; break;
434 case X86::MULX64Hrr
: NewOpc
= X86::MULX64rr
; break;
435 case X86::MULX64Hrm
: NewOpc
= X86::MULX64rm
; break;
437 OutMI
.setOpcode(NewOpc
);
438 // Duplicate the destination.
439 unsigned DestReg
= OutMI
.getOperand(0).getReg();
440 OutMI
.insert(OutMI
.begin(), MCOperand::createReg(DestReg
));
443 // CALL64r, CALL64pcrel32 - These instructions used to have
444 // register inputs modeled as normal uses instead of implicit uses. As such,
445 // they we used to truncate off all but the first operand (the callee). This
446 // issue seems to have been fixed at some point. This assert verifies that.
448 case X86::CALL64pcrel32
:
449 assert(OutMI
.getNumOperands() == 1 && "Unexpected number of operands!");
452 case X86::EH_RETURN64
: {
454 OutMI
.setOpcode(getRetOpcode(AsmPrinter
.getSubtarget()));
457 case X86::CLEANUPRET
: {
458 // Replace CLEANUPRET with the appropriate RET.
460 OutMI
.setOpcode(getRetOpcode(AsmPrinter
.getSubtarget()));
463 case X86::CATCHRET
: {
464 // Replace CATCHRET with the appropriate RET.
465 const X86Subtarget
&Subtarget
= AsmPrinter
.getSubtarget();
466 unsigned ReturnReg
= In64BitMode
? X86::RAX
: X86::EAX
;
468 OutMI
.setOpcode(getRetOpcode(Subtarget
));
469 OutMI
.addOperand(MCOperand::createReg(ReturnReg
));
472 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
475 case X86::TAILJMPr64
:
476 case X86::TAILJMPr64_REX
:
478 case X86::TAILJMPd64
:
479 assert(OutMI
.getNumOperands() == 1 && "Unexpected number of operands!");
480 OutMI
.setOpcode(convertTailJumpOpcode(OutMI
.getOpcode()));
482 case X86::TAILJMPd_CC
:
483 case X86::TAILJMPd64_CC
:
484 assert(OutMI
.getNumOperands() == 2 && "Unexpected number of operands!");
485 OutMI
.setOpcode(convertTailJumpOpcode(OutMI
.getOpcode()));
488 case X86::TAILJMPm64
:
489 case X86::TAILJMPm64_REX
:
490 assert(OutMI
.getNumOperands() == X86::AddrNumOperands
&&
491 "Unexpected number of operands!");
492 OutMI
.setOpcode(convertTailJumpOpcode(OutMI
.getOpcode()));
494 case X86::MASKMOVDQU
:
495 case X86::VMASKMOVDQU
:
497 OutMI
.setFlags(X86::IP_HAS_AD_SIZE
);
505 // Add an REP prefix to BSF instructions so that new processors can
506 // recognize as TZCNT, which has better performance than BSF.
507 // BSF and TZCNT have different interpretations on ZF bit. So make sure
508 // it won't be used later.
509 const MachineOperand
*FlagDef
= MI
->findRegisterDefOperand(X86::EFLAGS
);
510 if (!MF
.getFunction().hasOptSize() && FlagDef
&& FlagDef
->isDead())
511 OutMI
.setFlags(X86::IP_HAS_REPEAT
);
519 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower
&MCInstLowering
,
520 const MachineInstr
&MI
) {
521 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
522 bool Is64Bits
= MI
.getOpcode() != X86::TLS_addr32
&&
523 MI
.getOpcode() != X86::TLS_base_addr32
;
524 bool Is64BitsLP64
= MI
.getOpcode() == X86::TLS_addr64
||
525 MI
.getOpcode() == X86::TLS_base_addr64
;
526 MCContext
&Ctx
= OutStreamer
->getContext();
528 MCSymbolRefExpr::VariantKind SRVK
;
529 switch (MI
.getOpcode()) {
530 case X86::TLS_addr32
:
531 case X86::TLS_addr64
:
532 case X86::TLS_addrX32
:
533 SRVK
= MCSymbolRefExpr::VK_TLSGD
;
535 case X86::TLS_base_addr32
:
536 SRVK
= MCSymbolRefExpr::VK_TLSLDM
;
538 case X86::TLS_base_addr64
:
539 case X86::TLS_base_addrX32
:
540 SRVK
= MCSymbolRefExpr::VK_TLSLD
;
543 llvm_unreachable("unexpected opcode");
546 const MCSymbolRefExpr
*Sym
= MCSymbolRefExpr::create(
547 MCInstLowering
.GetSymbolFromOperand(MI
.getOperand(3)), SRVK
, Ctx
);
549 // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
550 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
551 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
552 // only using GOT when GOTPCRELX is enabled.
553 // TODO Delete the workaround when GOTPCRELX becomes commonplace.
554 bool UseGot
= MMI
->getModule()->getRtLibUseGOT() &&
555 Ctx
.getAsmInfo()->canRelaxRelocations();
558 bool NeedsPadding
= SRVK
== MCSymbolRefExpr::VK_TLSGD
;
559 if (NeedsPadding
&& Is64BitsLP64
)
560 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX
));
561 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r
)
568 const MCSymbol
*TlsGetAddr
= Ctx
.getOrCreateSymbol("__tls_get_addr");
571 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX
));
572 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX
));
573 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX
));
576 const MCExpr
*Expr
= MCSymbolRefExpr::create(
577 TlsGetAddr
, MCSymbolRefExpr::VK_GOTPCREL
, Ctx
);
578 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m
)
585 EmitAndCountInstruction(
586 MCInstBuilder(X86::CALL64pcrel32
)
587 .addExpr(MCSymbolRefExpr::create(TlsGetAddr
,
588 MCSymbolRefExpr::VK_PLT
, Ctx
)));
591 if (SRVK
== MCSymbolRefExpr::VK_TLSGD
&& !UseGot
) {
592 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r
)
600 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r
)
609 const MCSymbol
*TlsGetAddr
= Ctx
.getOrCreateSymbol("___tls_get_addr");
612 MCSymbolRefExpr::create(TlsGetAddr
, MCSymbolRefExpr::VK_GOT
, Ctx
);
613 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m
)
620 EmitAndCountInstruction(
621 MCInstBuilder(X86::CALLpcrel32
)
622 .addExpr(MCSymbolRefExpr::create(TlsGetAddr
,
623 MCSymbolRefExpr::VK_PLT
, Ctx
)));
628 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
629 /// bytes. Return the size of nop emitted.
630 static unsigned emitNop(MCStreamer
&OS
, unsigned NumBytes
,
631 const X86Subtarget
*Subtarget
) {
632 // Determine the longest nop which can be efficiently decoded for the given
633 // target cpu. 15-bytes is the longest single NOP instruction, but some
634 // platforms can't decode the longest forms efficiently.
635 unsigned MaxNopLength
= 1;
636 if (Subtarget
->is64Bit()) {
637 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
638 // IndexReg/BaseReg below need to be updated.
639 if (Subtarget
->hasFeature(X86::TuningFast7ByteNOP
))
641 else if (Subtarget
->hasFeature(X86::TuningFast15ByteNOP
))
643 else if (Subtarget
->hasFeature(X86::TuningFast11ByteNOP
))
647 } if (Subtarget
->is32Bit())
650 // Cap a single nop emission at the profitable value for the target
651 NumBytes
= std::min(NumBytes
, MaxNopLength
);
654 unsigned Opc
, BaseReg
, ScaleVal
, IndexReg
, Displacement
, SegmentReg
;
655 IndexReg
= Displacement
= SegmentReg
= 0;
660 llvm_unreachable("Zero nops?");
713 SegmentReg
= X86::CS
;
717 unsigned NumPrefixes
= std::min(NumBytes
- NopSize
, 5U);
718 NopSize
+= NumPrefixes
;
719 for (unsigned i
= 0; i
!= NumPrefixes
; ++i
)
720 OS
.emitBytes("\x66");
723 default: llvm_unreachable("Unexpected opcode");
725 OS
.emitInstruction(MCInstBuilder(Opc
), *Subtarget
);
728 OS
.emitInstruction(MCInstBuilder(Opc
).addReg(X86::AX
).addReg(X86::AX
),
733 OS
.emitInstruction(MCInstBuilder(Opc
)
737 .addImm(Displacement
)
742 assert(NopSize
<= NumBytes
&& "We overemitted?");
746 /// Emit the optimal amount of multi-byte nops on X86.
747 static void emitX86Nops(MCStreamer
&OS
, unsigned NumBytes
,
748 const X86Subtarget
*Subtarget
) {
749 unsigned NopsToEmit
= NumBytes
;
752 NumBytes
-= emitNop(OS
, NumBytes
, Subtarget
);
753 assert(NopsToEmit
>= NumBytes
&& "Emitted more than I asked for!");
757 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr
&MI
,
758 X86MCInstLower
&MCIL
) {
759 assert(Subtarget
->is64Bit() && "Statepoint currently only supports X86-64");
761 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
763 StatepointOpers
SOpers(&MI
);
764 if (unsigned PatchBytes
= SOpers
.getNumPatchBytes()) {
765 emitX86Nops(*OutStreamer
, PatchBytes
, Subtarget
);
767 // Lower call target and choose correct opcode
768 const MachineOperand
&CallTarget
= SOpers
.getCallTarget();
769 MCOperand CallTargetMCOp
;
771 switch (CallTarget
.getType()) {
772 case MachineOperand::MO_GlobalAddress
:
773 case MachineOperand::MO_ExternalSymbol
:
774 CallTargetMCOp
= MCIL
.LowerSymbolOperand(
775 CallTarget
, MCIL
.GetSymbolFromOperand(CallTarget
));
776 CallOpcode
= X86::CALL64pcrel32
;
777 // Currently, we only support relative addressing with statepoints.
778 // Otherwise, we'll need a scratch register to hold the target
779 // address. You'll fail asserts during load & relocation if this
780 // symbol is to far away. (TODO: support non-relative addressing)
782 case MachineOperand::MO_Immediate
:
783 CallTargetMCOp
= MCOperand::createImm(CallTarget
.getImm());
784 CallOpcode
= X86::CALL64pcrel32
;
785 // Currently, we only support relative addressing with statepoints.
786 // Otherwise, we'll need a scratch register to hold the target
787 // immediate. You'll fail asserts during load & relocation if this
788 // address is to far away. (TODO: support non-relative addressing)
790 case MachineOperand::MO_Register
:
791 // FIXME: Add retpoline support and remove this.
792 if (Subtarget
->useIndirectThunkCalls())
793 report_fatal_error("Lowering register statepoints with thunks not "
795 CallTargetMCOp
= MCOperand::createReg(CallTarget
.getReg());
796 CallOpcode
= X86::CALL64r
;
799 llvm_unreachable("Unsupported operand type in statepoint call target");
805 CallInst
.setOpcode(CallOpcode
);
806 CallInst
.addOperand(CallTargetMCOp
);
807 OutStreamer
->emitInstruction(CallInst
, getSubtargetInfo());
810 // Record our statepoint node in the same section used by STACKMAP
812 auto &Ctx
= OutStreamer
->getContext();
813 MCSymbol
*MILabel
= Ctx
.createTempSymbol();
814 OutStreamer
->emitLabel(MILabel
);
815 SM
.recordStatepoint(*MILabel
, MI
);
818 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr
&FaultingMI
,
819 X86MCInstLower
&MCIL
) {
820 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
821 // <opcode>, <operands>
823 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
825 Register DefRegister
= FaultingMI
.getOperand(0).getReg();
826 FaultMaps::FaultKind FK
=
827 static_cast<FaultMaps::FaultKind
>(FaultingMI
.getOperand(1).getImm());
828 MCSymbol
*HandlerLabel
= FaultingMI
.getOperand(2).getMBB()->getSymbol();
829 unsigned Opcode
= FaultingMI
.getOperand(3).getImm();
830 unsigned OperandsBeginIdx
= 4;
832 auto &Ctx
= OutStreamer
->getContext();
833 MCSymbol
*FaultingLabel
= Ctx
.createTempSymbol();
834 OutStreamer
->emitLabel(FaultingLabel
);
836 assert(FK
< FaultMaps::FaultKindMax
&& "Invalid Faulting Kind!");
837 FM
.recordFaultingOp(FK
, FaultingLabel
, HandlerLabel
);
840 MI
.setOpcode(Opcode
);
842 if (DefRegister
!= X86::NoRegister
)
843 MI
.addOperand(MCOperand::createReg(DefRegister
));
845 for (const MachineOperand
&MO
:
846 llvm::drop_begin(FaultingMI
.operands(), OperandsBeginIdx
))
847 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&FaultingMI
, MO
))
848 MI
.addOperand(*MaybeOperand
);
850 OutStreamer
->AddComment("on-fault: " + HandlerLabel
->getName());
851 OutStreamer
->emitInstruction(MI
, getSubtargetInfo());
854 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr
&MI
,
855 X86MCInstLower
&MCIL
) {
856 bool Is64Bits
= Subtarget
->is64Bit();
857 MCContext
&Ctx
= OutStreamer
->getContext();
858 MCSymbol
*fentry
= Ctx
.getOrCreateSymbol("__fentry__");
859 const MCSymbolRefExpr
*Op
=
860 MCSymbolRefExpr::create(fentry
, MCSymbolRefExpr::VK_None
, Ctx
);
862 EmitAndCountInstruction(
863 MCInstBuilder(Is64Bits
? X86::CALL64pcrel32
: X86::CALLpcrel32
)
867 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr
&MI
) {
868 assert(std::next(MI
.getIterator())->isCall() &&
869 "KCFI_CHECK not followed by a call instruction");
871 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
872 // returns a 1-byte X86::NOOP, which means the offset is the same in
873 // bytes. This assumes that patchable-function-prefix is the same for all
875 const MachineFunction
&MF
= *MI
.getMF();
876 int64_t PrefixNops
= 0;
877 (void)MF
.getFunction()
878 .getFnAttribute("patchable-function-prefix")
880 .getAsInteger(10, PrefixNops
);
882 // KCFI allows indirect calls to any location that's preceded by a valid
883 // type identifier. To avoid encoding the full constant into an instruction,
884 // and thus emitting potential call target gadgets at each indirect call
885 // site, load a negated constant to a register and compare that to the
886 // expected value at the call target.
887 const Register AddrReg
= MI
.getOperand(0).getReg();
888 const uint32_t Type
= MI
.getOperand(1).getImm();
889 // The check is immediately before the call. If the call target is in R10,
890 // we can clobber R11 for the check instead.
891 unsigned TempReg
= AddrReg
== X86::R10
? X86::R11D
: X86::R10D
;
892 EmitAndCountInstruction(
893 MCInstBuilder(X86::MOV32ri
).addReg(TempReg
).addImm(-MaskKCFIType(Type
)));
894 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm
)
895 .addReg(X86::NoRegister
)
899 .addReg(X86::NoRegister
)
900 .addImm(-(PrefixNops
+ 4))
901 .addReg(X86::NoRegister
));
903 MCSymbol
*Pass
= OutContext
.createTempSymbol();
904 EmitAndCountInstruction(
905 MCInstBuilder(X86::JCC_1
)
906 .addExpr(MCSymbolRefExpr::create(Pass
, OutContext
))
907 .addImm(X86::COND_E
));
909 MCSymbol
*Trap
= OutContext
.createTempSymbol();
910 OutStreamer
->emitLabel(Trap
);
911 EmitAndCountInstruction(MCInstBuilder(X86::TRAP
));
912 emitKCFITrapEntry(MF
, Trap
);
913 OutStreamer
->emitLabel(Pass
);
916 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr
&MI
) {
917 // FIXME: Make this work on non-ELF.
918 if (!TM
.getTargetTriple().isOSBinFormatELF()) {
919 report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
923 const auto &Reg
= MI
.getOperand(0).getReg();
924 ASanAccessInfo
AccessInfo(MI
.getOperand(1).getImm());
929 getAddressSanitizerParams(Triple(TM
.getTargetTriple()), 64,
930 AccessInfo
.CompileKernel
, &ShadowBase
,
931 &MappingScale
, &OrShadowOffset
);
933 StringRef Name
= AccessInfo
.IsWrite
? "store" : "load";
934 StringRef Op
= OrShadowOffset
? "or" : "add";
935 std::string SymName
= ("__asan_check_" + Name
+ "_" + Op
+ "_" +
936 Twine(1ULL << AccessInfo
.AccessSizeIndex
) + "_" +
937 TM
.getMCRegisterInfo()->getName(Reg
.asMCReg()))
941 "OrShadowOffset is not supported with optimized callbacks");
943 EmitAndCountInstruction(
944 MCInstBuilder(X86::CALL64pcrel32
)
945 .addExpr(MCSymbolRefExpr::create(
946 OutContext
.getOrCreateSymbol(SymName
), OutContext
)));
949 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr
&MI
,
950 X86MCInstLower
&MCIL
) {
951 // PATCHABLE_OP minsize
953 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
955 auto NextMI
= std::find_if(std::next(MI
.getIterator()),
956 MI
.getParent()->end().getInstrIterator(),
957 [](auto &II
) { return !II
.isMetaInstruction(); });
959 SmallString
<256> Code
;
960 unsigned MinSize
= MI
.getOperand(0).getImm();
962 if (NextMI
!= MI
.getParent()->end()) {
963 // Lower the next MachineInstr to find its byte size.
965 MCIL
.Lower(&*NextMI
, MCI
);
967 SmallVector
<MCFixup
, 4> Fixups
;
968 CodeEmitter
->encodeInstruction(MCI
, Code
, Fixups
, getSubtargetInfo());
971 if (Code
.size() < MinSize
) {
972 if (MinSize
== 2 && Subtarget
->is32Bit() &&
973 Subtarget
->isTargetWindowsMSVC() &&
974 (Subtarget
->getCPU().empty() || Subtarget
->getCPU() == "pentium3")) {
975 // For compatibility reasons, when targetting MSVC, it is important to
976 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
977 // rely specifically on this pattern to be able to patch a function.
978 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
979 OutStreamer
->emitInstruction(
980 MCInstBuilder(X86::MOV32rr_REV
).addReg(X86::EDI
).addReg(X86::EDI
),
983 unsigned NopSize
= emitNop(*OutStreamer
, MinSize
, Subtarget
);
984 assert(NopSize
== MinSize
&& "Could not implement MinSize!");
990 // Lower a stackmap of the form:
991 // <id>, <shadowBytes>, ...
992 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr
&MI
) {
993 SMShadowTracker
.emitShadowPadding(*OutStreamer
, getSubtargetInfo());
995 auto &Ctx
= OutStreamer
->getContext();
996 MCSymbol
*MILabel
= Ctx
.createTempSymbol();
997 OutStreamer
->emitLabel(MILabel
);
999 SM
.recordStackMap(*MILabel
, MI
);
1000 unsigned NumShadowBytes
= MI
.getOperand(1).getImm();
1001 SMShadowTracker
.reset(NumShadowBytes
);
1004 // Lower a patchpoint of the form:
1005 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1006 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr
&MI
,
1007 X86MCInstLower
&MCIL
) {
1008 assert(Subtarget
->is64Bit() && "Patchpoint currently only supports X86-64");
1010 SMShadowTracker
.emitShadowPadding(*OutStreamer
, getSubtargetInfo());
1012 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1014 auto &Ctx
= OutStreamer
->getContext();
1015 MCSymbol
*MILabel
= Ctx
.createTempSymbol();
1016 OutStreamer
->emitLabel(MILabel
);
1017 SM
.recordPatchPoint(*MILabel
, MI
);
1019 PatchPointOpers
opers(&MI
);
1020 unsigned ScratchIdx
= opers
.getNextScratchIdx();
1021 unsigned EncodedBytes
= 0;
1022 const MachineOperand
&CalleeMO
= opers
.getCallTarget();
1024 // Check for null target. If target is non-null (i.e. is non-zero or is
1025 // symbolic) then emit a call.
1026 if (!(CalleeMO
.isImm() && !CalleeMO
.getImm())) {
1027 MCOperand CalleeMCOp
;
1028 switch (CalleeMO
.getType()) {
1030 /// FIXME: Add a verifier check for bad callee types.
1031 llvm_unreachable("Unrecognized callee operand type.");
1032 case MachineOperand::MO_Immediate
:
1033 if (CalleeMO
.getImm())
1034 CalleeMCOp
= MCOperand::createImm(CalleeMO
.getImm());
1036 case MachineOperand::MO_ExternalSymbol
:
1037 case MachineOperand::MO_GlobalAddress
:
1038 CalleeMCOp
= MCIL
.LowerSymbolOperand(CalleeMO
,
1039 MCIL
.GetSymbolFromOperand(CalleeMO
));
1043 // Emit MOV to materialize the target address and the CALL to target.
1044 // This is encoded with 12-13 bytes, depending on which register is used.
1045 Register ScratchReg
= MI
.getOperand(ScratchIdx
).getReg();
1046 if (X86II::isX86_64ExtendedReg(ScratchReg
))
1051 EmitAndCountInstruction(
1052 MCInstBuilder(X86::MOV64ri
).addReg(ScratchReg
).addOperand(CalleeMCOp
));
1053 // FIXME: Add retpoline support and remove this.
1054 if (Subtarget
->useIndirectThunkCalls())
1056 "Lowering patchpoint with thunks not yet implemented.");
1057 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r
).addReg(ScratchReg
));
1061 unsigned NumBytes
= opers
.getNumPatchBytes();
1062 assert(NumBytes
>= EncodedBytes
&&
1063 "Patchpoint can't request size less than the length of a call.");
1065 emitX86Nops(*OutStreamer
, NumBytes
- EncodedBytes
, Subtarget
);
1068 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr
&MI
,
1069 X86MCInstLower
&MCIL
) {
1070 assert(Subtarget
->is64Bit() && "XRay custom events only supports X86-64");
1072 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1074 // We want to emit the following pattern, which follows the x86 calling
1075 // convention to prepare for the trampoline call to be patched in.
1078 // .Lxray_event_sled_N:
1079 // jmp +N // jump across the instrumentation sled
1080 // ... // set up arguments in register
1081 // callq __xray_CustomEvent@plt // force dependency to symbol
1085 // After patching, it would look something like:
1087 // nopw (2-byte nop)
1089 // callq __xrayCustomEvent // already lowered
1093 // First we emit the label and the jump.
1094 auto CurSled
= OutContext
.createTempSymbol("xray_event_sled_", true);
1095 OutStreamer
->AddComment("# XRay Custom Event Log");
1096 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1097 OutStreamer
->emitLabel(CurSled
);
1099 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1100 // an operand (computed as an offset from the jmp instruction).
1101 // FIXME: Find another less hacky way do force the relative jump.
1102 OutStreamer
->emitBinaryData("\xeb\x0f");
1104 // The default C calling convention will place two arguments into %rcx and
1105 // %rdx -- so we only work with those.
1106 const Register DestRegs
[] = {X86::RDI
, X86::RSI
};
1107 bool UsedMask
[] = {false, false};
1108 // Filled out in loop.
1109 Register SrcRegs
[] = {0, 0};
1111 // Then we put the operands in the %rdi and %rsi registers. We spill the
1112 // values in the register before we clobber them, and mark them as used in
1113 // UsedMask. In case the arguments are already in the correct register, we use
1114 // emit nops appropriately sized to keep the sled the same size in every
1116 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1117 if (auto Op
= MCIL
.LowerMachineOperand(&MI
, MI
.getOperand(I
))) {
1118 assert(Op
->isReg() && "Only support arguments in registers");
1119 SrcRegs
[I
] = getX86SubSuperRegister(Op
->getReg(), 64);
1120 assert(SrcRegs
[I
].isValid() && "Invalid operand");
1121 if (SrcRegs
[I
] != DestRegs
[I
]) {
1123 EmitAndCountInstruction(
1124 MCInstBuilder(X86::PUSH64r
).addReg(DestRegs
[I
]));
1126 emitX86Nops(*OutStreamer
, 4, Subtarget
);
1130 // Now that the register values are stashed, mov arguments into place.
1131 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1132 // earlier DestReg. We will have already overwritten over the register before
1133 // we can copy from it.
1134 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1135 if (SrcRegs
[I
] != DestRegs
[I
])
1136 EmitAndCountInstruction(
1137 MCInstBuilder(X86::MOV64rr
).addReg(DestRegs
[I
]).addReg(SrcRegs
[I
]));
1139 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1140 // name of the trampoline to be implemented by the XRay runtime.
1141 auto TSym
= OutContext
.getOrCreateSymbol("__xray_CustomEvent");
1142 MachineOperand TOp
= MachineOperand::CreateMCSymbol(TSym
);
1143 if (isPositionIndependent())
1144 TOp
.setTargetFlags(X86II::MO_PLT
);
1146 // Emit the call instruction.
1147 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32
)
1148 .addOperand(MCIL
.LowerSymbolOperand(TOp
, TSym
)));
1150 // Restore caller-saved and used registers.
1151 for (unsigned I
= sizeof UsedMask
; I
-- > 0;)
1153 EmitAndCountInstruction(MCInstBuilder(X86::POP64r
).addReg(DestRegs
[I
]));
1155 emitX86Nops(*OutStreamer
, 1, Subtarget
);
1157 OutStreamer
->AddComment("xray custom event end.");
1159 // Record the sled version. Version 0 of this sled was spelled differently, so
1160 // we let the runtime handle the different offsets we're using. Version 2
1161 // changed the absolute address to a PC-relative address.
1162 recordSled(CurSled
, MI
, SledKind::CUSTOM_EVENT
, 2);
1165 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr
&MI
,
1166 X86MCInstLower
&MCIL
) {
1167 assert(Subtarget
->is64Bit() && "XRay typed events only supports X86-64");
1169 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1171 // We want to emit the following pattern, which follows the x86 calling
1172 // convention to prepare for the trampoline call to be patched in.
1175 // .Lxray_event_sled_N:
1176 // jmp +N // jump across the instrumentation sled
1177 // ... // set up arguments in register
1178 // callq __xray_TypedEvent@plt // force dependency to symbol
1182 // After patching, it would look something like:
1184 // nopw (2-byte nop)
1186 // callq __xrayTypedEvent // already lowered
1190 // First we emit the label and the jump.
1191 auto CurSled
= OutContext
.createTempSymbol("xray_typed_event_sled_", true);
1192 OutStreamer
->AddComment("# XRay Typed Event Log");
1193 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1194 OutStreamer
->emitLabel(CurSled
);
1196 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1197 // an operand (computed as an offset from the jmp instruction).
1198 // FIXME: Find another less hacky way do force the relative jump.
1199 OutStreamer
->emitBinaryData("\xeb\x14");
1201 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1202 // so we'll work with those. Or we may be called via SystemV, in which case
1203 // we don't have to do any translation.
1204 const Register DestRegs
[] = {X86::RDI
, X86::RSI
, X86::RDX
};
1205 bool UsedMask
[] = {false, false, false};
1207 // Will fill out src regs in the loop.
1208 Register SrcRegs
[] = {0, 0, 0};
1210 // Then we put the operands in the SystemV registers. We spill the values in
1211 // the registers before we clobber them, and mark them as used in UsedMask.
1212 // In case the arguments are already in the correct register, we emit nops
1213 // appropriately sized to keep the sled the same size in every situation.
1214 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1215 if (auto Op
= MCIL
.LowerMachineOperand(&MI
, MI
.getOperand(I
))) {
1216 // TODO: Is register only support adequate?
1217 assert(Op
->isReg() && "Only supports arguments in registers");
1218 SrcRegs
[I
] = getX86SubSuperRegister(Op
->getReg(), 64);
1219 assert(SrcRegs
[I
].isValid() && "Invalid operand");
1220 if (SrcRegs
[I
] != DestRegs
[I
]) {
1222 EmitAndCountInstruction(
1223 MCInstBuilder(X86::PUSH64r
).addReg(DestRegs
[I
]));
1225 emitX86Nops(*OutStreamer
, 4, Subtarget
);
1229 // In the above loop we only stash all of the destination registers or emit
1230 // nops if the arguments are already in the right place. Doing the actually
1231 // moving is postponed until after all the registers are stashed so nothing
1232 // is clobbers. We've already added nops to account for the size of mov and
1233 // push if the register is in the right place, so we only have to worry about
1235 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1236 // earlier DestReg. We will have already overwritten over the register before
1237 // we can copy from it.
1238 for (unsigned I
= 0; I
< MI
.getNumOperands(); ++I
)
1240 EmitAndCountInstruction(
1241 MCInstBuilder(X86::MOV64rr
).addReg(DestRegs
[I
]).addReg(SrcRegs
[I
]));
1243 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1244 // name of the trampoline to be implemented by the XRay runtime.
1245 auto TSym
= OutContext
.getOrCreateSymbol("__xray_TypedEvent");
1246 MachineOperand TOp
= MachineOperand::CreateMCSymbol(TSym
);
1247 if (isPositionIndependent())
1248 TOp
.setTargetFlags(X86II::MO_PLT
);
1250 // Emit the call instruction.
1251 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32
)
1252 .addOperand(MCIL
.LowerSymbolOperand(TOp
, TSym
)));
1254 // Restore caller-saved and used registers.
1255 for (unsigned I
= sizeof UsedMask
; I
-- > 0;)
1257 EmitAndCountInstruction(MCInstBuilder(X86::POP64r
).addReg(DestRegs
[I
]));
1259 emitX86Nops(*OutStreamer
, 1, Subtarget
);
1261 OutStreamer
->AddComment("xray typed event end.");
1263 // Record the sled version.
1264 recordSled(CurSled
, MI
, SledKind::TYPED_EVENT
, 2);
1267 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr
&MI
,
1268 X86MCInstLower
&MCIL
) {
1270 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1272 const Function
&F
= MF
->getFunction();
1273 if (F
.hasFnAttribute("patchable-function-entry")) {
1275 if (F
.getFnAttribute("patchable-function-entry")
1277 .getAsInteger(10, Num
))
1279 emitX86Nops(*OutStreamer
, Num
, Subtarget
);
1282 // We want to emit the following pattern:
1287 // # 9 bytes worth of noops
1289 // We need the 9 bytes because at runtime, we'd be patching over the full 11
1290 // bytes with the following pattern:
1292 // mov %r10, <function id, 32-bit> // 6 bytes
1293 // call <relative offset, 32-bits> // 5 bytes
1295 auto CurSled
= OutContext
.createTempSymbol("xray_sled_", true);
1296 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1297 OutStreamer
->emitLabel(CurSled
);
1299 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1300 // an operand (computed as an offset from the jmp instruction).
1301 // FIXME: Find another less hacky way do force the relative jump.
1302 OutStreamer
->emitBytes("\xeb\x09");
1303 emitX86Nops(*OutStreamer
, 9, Subtarget
);
1304 recordSled(CurSled
, MI
, SledKind::FUNCTION_ENTER
, 2);
1307 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr
&MI
,
1308 X86MCInstLower
&MCIL
) {
1309 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1311 // Since PATCHABLE_RET takes the opcode of the return statement as an
1312 // argument, we use that to emit the correct form of the RET that we want.
1313 // i.e. when we see this:
1315 // PATCHABLE_RET X86::RET ...
1317 // We should emit the RET followed by sleds.
1321 // ret # or equivalent instruction
1322 // # 10 bytes worth of noops
1324 // This just makes sure that the alignment for the next instruction is 2.
1325 auto CurSled
= OutContext
.createTempSymbol("xray_sled_", true);
1326 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1327 OutStreamer
->emitLabel(CurSled
);
1328 unsigned OpCode
= MI
.getOperand(0).getImm();
1330 Ret
.setOpcode(OpCode
);
1331 for (auto &MO
: drop_begin(MI
.operands()))
1332 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&MI
, MO
))
1333 Ret
.addOperand(*MaybeOperand
);
1334 OutStreamer
->emitInstruction(Ret
, getSubtargetInfo());
1335 emitX86Nops(*OutStreamer
, 10, Subtarget
);
1336 recordSled(CurSled
, MI
, SledKind::FUNCTION_EXIT
, 2);
1339 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr
&MI
,
1340 X86MCInstLower
&MCIL
) {
1341 NoAutoPaddingScope
NoPadScope(*OutStreamer
);
1343 // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1344 // instruction so we lower that particular instruction and its operands.
1345 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1346 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1347 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1348 // tail call much like how we have it in PATCHABLE_RET.
1349 auto CurSled
= OutContext
.createTempSymbol("xray_sled_", true);
1350 OutStreamer
->emitCodeAlignment(Align(2), &getSubtargetInfo());
1351 OutStreamer
->emitLabel(CurSled
);
1352 auto Target
= OutContext
.createTempSymbol();
1354 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1355 // an operand (computed as an offset from the jmp instruction).
1356 // FIXME: Find another less hacky way do force the relative jump.
1357 OutStreamer
->emitBytes("\xeb\x09");
1358 emitX86Nops(*OutStreamer
, 9, Subtarget
);
1359 OutStreamer
->emitLabel(Target
);
1360 recordSled(CurSled
, MI
, SledKind::TAIL_CALL
, 2);
1362 unsigned OpCode
= MI
.getOperand(0).getImm();
1363 OpCode
= convertTailJumpOpcode(OpCode
);
1365 TC
.setOpcode(OpCode
);
1367 // Before emitting the instruction, add a comment to indicate that this is
1368 // indeed a tail call.
1369 OutStreamer
->AddComment("TAILCALL");
1370 for (auto &MO
: drop_begin(MI
.operands()))
1371 if (auto MaybeOperand
= MCIL
.LowerMachineOperand(&MI
, MO
))
1372 TC
.addOperand(*MaybeOperand
);
1373 OutStreamer
->emitInstruction(TC
, getSubtargetInfo());
1376 // Returns instruction preceding MBBI in MachineFunction.
1377 // If MBBI is the first instruction of the first basic block, returns null.
1378 static MachineBasicBlock::const_iterator
1379 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI
) {
1380 const MachineBasicBlock
*MBB
= MBBI
->getParent();
1381 while (MBBI
== MBB
->begin()) {
1382 if (MBB
== &MBB
->getParent()->front())
1383 return MachineBasicBlock::const_iterator();
1384 MBB
= MBB
->getPrevNode();
1391 static std::string
getShuffleComment(const MachineInstr
*MI
, unsigned SrcOp1Idx
,
1392 unsigned SrcOp2Idx
, ArrayRef
<int> Mask
) {
1393 std::string Comment
;
1395 // Compute the name for a register. This is really goofy because we have
1396 // multiple instruction printers that could (in theory) use different
1397 // names. Fortunately most people use the ATT style (outside of Windows)
1398 // and they actually agree on register naming here. Ultimately, this is
1399 // a comment, and so its OK if it isn't perfect.
1400 auto GetRegisterName
= [](MCRegister Reg
) -> StringRef
{
1401 return X86ATTInstPrinter::getRegisterName(Reg
);
1404 const MachineOperand
&DstOp
= MI
->getOperand(0);
1405 const MachineOperand
&SrcOp1
= MI
->getOperand(SrcOp1Idx
);
1406 const MachineOperand
&SrcOp2
= MI
->getOperand(SrcOp2Idx
);
1408 StringRef DstName
= DstOp
.isReg() ? GetRegisterName(DstOp
.getReg()) : "mem";
1409 StringRef Src1Name
=
1410 SrcOp1
.isReg() ? GetRegisterName(SrcOp1
.getReg()) : "mem";
1411 StringRef Src2Name
=
1412 SrcOp2
.isReg() ? GetRegisterName(SrcOp2
.getReg()) : "mem";
1414 // One source operand, fix the mask to print all elements in one span.
1415 SmallVector
<int, 8> ShuffleMask(Mask
);
1416 if (Src1Name
== Src2Name
)
1417 for (int i
= 0, e
= ShuffleMask
.size(); i
!= e
; ++i
)
1418 if (ShuffleMask
[i
] >= e
)
1419 ShuffleMask
[i
] -= e
;
1421 raw_string_ostream
CS(Comment
);
1424 // Handle AVX512 MASK/MASXZ write mask comments.
1426 // MASKZ: zmmX {%kY} {z}
1427 if (SrcOp1Idx
> 1) {
1428 assert((SrcOp1Idx
== 2 || SrcOp1Idx
== 3) && "Unexpected writemask");
1430 const MachineOperand
&WriteMaskOp
= MI
->getOperand(SrcOp1Idx
- 1);
1431 if (WriteMaskOp
.isReg()) {
1432 CS
<< " {%" << GetRegisterName(WriteMaskOp
.getReg()) << "}";
1434 if (SrcOp1Idx
== 2) {
1442 for (int i
= 0, e
= ShuffleMask
.size(); i
!= e
; ++i
) {
1445 if (ShuffleMask
[i
] == SM_SentinelZero
) {
1450 // Otherwise, it must come from src1 or src2. Print the span of elements
1451 // that comes from this src.
1452 bool isSrc1
= ShuffleMask
[i
] < (int)e
;
1453 CS
<< (isSrc1
? Src1Name
: Src2Name
) << '[';
1455 bool IsFirst
= true;
1456 while (i
!= e
&& ShuffleMask
[i
] != SM_SentinelZero
&&
1457 (ShuffleMask
[i
] < (int)e
) == isSrc1
) {
1462 if (ShuffleMask
[i
] == SM_SentinelUndef
)
1465 CS
<< ShuffleMask
[i
] % (int)e
;
1469 --i
; // For loop increments element #.
1476 static void printConstant(const APInt
&Val
, raw_ostream
&CS
,
1477 bool PrintZero
= false) {
1478 if (Val
.getBitWidth() <= 64) {
1479 CS
<< (PrintZero
? 0ULL : Val
.getZExtValue());
1481 // print multi-word constant as (w0,w1)
1483 for (int i
= 0, N
= Val
.getNumWords(); i
< N
; ++i
) {
1486 CS
<< (PrintZero
? 0ULL : Val
.getRawData()[i
]);
1492 static void printConstant(const APFloat
&Flt
, raw_ostream
&CS
,
1493 bool PrintZero
= false) {
1494 SmallString
<32> Str
;
1495 // Force scientific notation to distinguish from integers.
1497 APFloat::getZero(Flt
.getSemantics()).toString(Str
, 0, 0);
1499 Flt
.toString(Str
, 0, 0);
1503 static void printConstant(const Constant
*COp
, unsigned BitWidth
,
1504 raw_ostream
&CS
, bool PrintZero
= false) {
1505 if (isa
<UndefValue
>(COp
)) {
1507 } else if (auto *CI
= dyn_cast
<ConstantInt
>(COp
)) {
1508 printConstant(CI
->getValue(), CS
, PrintZero
);
1509 } else if (auto *CF
= dyn_cast
<ConstantFP
>(COp
)) {
1510 printConstant(CF
->getValueAPF(), CS
, PrintZero
);
1511 } else if (auto *CDS
= dyn_cast
<ConstantDataSequential
>(COp
)) {
1512 Type
*EltTy
= CDS
->getElementType();
1513 bool IsInteger
= EltTy
->isIntegerTy();
1514 bool IsFP
= EltTy
->isHalfTy() || EltTy
->isFloatTy() || EltTy
->isDoubleTy();
1515 unsigned EltBits
= EltTy
->getPrimitiveSizeInBits();
1516 unsigned E
= std::min(BitWidth
/ EltBits
, CDS
->getNumElements());
1517 assert((BitWidth
% EltBits
) == 0 && "Element size mismatch");
1518 for (unsigned I
= 0; I
!= E
; ++I
) {
1522 printConstant(CDS
->getElementAsAPInt(I
), CS
, PrintZero
);
1524 printConstant(CDS
->getElementAsAPFloat(I
), CS
, PrintZero
);
1528 } else if (auto *CV
= dyn_cast
<ConstantVector
>(COp
)) {
1529 unsigned EltBits
= CV
->getType()->getScalarSizeInBits();
1530 unsigned E
= std::min(BitWidth
/ EltBits
, CV
->getNumOperands());
1531 assert((BitWidth
% EltBits
) == 0 && "Element size mismatch");
1532 for (unsigned I
= 0; I
!= E
; ++I
) {
1535 printConstant(CV
->getOperand(I
), EltBits
, CS
, PrintZero
);
1542 static void printZeroUpperMove(const MachineInstr
*MI
, MCStreamer
&OutStreamer
,
1543 int SclWidth
, int VecWidth
,
1544 const char *ShuffleComment
) {
1545 std::string Comment
;
1546 raw_string_ostream
CS(Comment
);
1547 const MachineOperand
&DstOp
= MI
->getOperand(0);
1548 CS
<< X86ATTInstPrinter::getRegisterName(DstOp
.getReg()) << " = ";
1550 if (auto *C
= X86::getConstantFromPool(*MI
, 1)) {
1552 printConstant(C
, SclWidth
, CS
);
1553 for (int I
= 1, E
= VecWidth
/ SclWidth
; I
< E
; ++I
) {
1555 printConstant(C
, SclWidth
, CS
, true);
1558 OutStreamer
.AddComment(CS
.str());
1559 return; // early-out
1562 // We didn't find a constant load, fallback to a shuffle mask decode.
1563 CS
<< ShuffleComment
;
1564 OutStreamer
.AddComment(CS
.str());
1567 static void printBroadcast(const MachineInstr
*MI
, MCStreamer
&OutStreamer
,
1568 int Repeats
, int BitWidth
) {
1569 if (auto *C
= X86::getConstantFromPool(*MI
, 1)) {
1570 std::string Comment
;
1571 raw_string_ostream
CS(Comment
);
1572 const MachineOperand
&DstOp
= MI
->getOperand(0);
1573 CS
<< X86ATTInstPrinter::getRegisterName(DstOp
.getReg()) << " = ";
1575 for (int l
= 0; l
!= Repeats
; ++l
) {
1578 printConstant(C
, BitWidth
, CS
);
1581 OutStreamer
.AddComment(CS
.str());
1585 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr
*MI
) {
1586 assert(MF
->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1587 assert((getSubtarget().isOSWindows() || TM
.getTargetTriple().isUEFI()) &&
1588 "SEH_ instruction Windows and UEFI only");
1590 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1592 X86TargetStreamer
*XTS
=
1593 static_cast<X86TargetStreamer
*>(OutStreamer
->getTargetStreamer());
1594 switch (MI
->getOpcode()) {
1595 case X86::SEH_PushReg
:
1596 XTS
->emitFPOPushReg(MI
->getOperand(0).getImm());
1598 case X86::SEH_StackAlloc
:
1599 XTS
->emitFPOStackAlloc(MI
->getOperand(0).getImm());
1601 case X86::SEH_StackAlign
:
1602 XTS
->emitFPOStackAlign(MI
->getOperand(0).getImm());
1604 case X86::SEH_SetFrame
:
1605 assert(MI
->getOperand(1).getImm() == 0 &&
1606 ".cv_fpo_setframe takes no offset");
1607 XTS
->emitFPOSetFrame(MI
->getOperand(0).getImm());
1609 case X86::SEH_EndPrologue
:
1610 XTS
->emitFPOEndPrologue();
1612 case X86::SEH_SaveReg
:
1613 case X86::SEH_SaveXMM
:
1614 case X86::SEH_PushFrame
:
1615 llvm_unreachable("SEH_ directive incompatible with FPO");
1618 llvm_unreachable("expected SEH_ instruction");
1623 // Otherwise, use the .seh_ directives for all other Windows platforms.
1624 switch (MI
->getOpcode()) {
1625 case X86::SEH_PushReg
:
1626 OutStreamer
->emitWinCFIPushReg(MI
->getOperand(0).getImm());
1629 case X86::SEH_SaveReg
:
1630 OutStreamer
->emitWinCFISaveReg(MI
->getOperand(0).getImm(),
1631 MI
->getOperand(1).getImm());
1634 case X86::SEH_SaveXMM
:
1635 OutStreamer
->emitWinCFISaveXMM(MI
->getOperand(0).getImm(),
1636 MI
->getOperand(1).getImm());
1639 case X86::SEH_StackAlloc
:
1640 OutStreamer
->emitWinCFIAllocStack(MI
->getOperand(0).getImm());
1643 case X86::SEH_SetFrame
:
1644 OutStreamer
->emitWinCFISetFrame(MI
->getOperand(0).getImm(),
1645 MI
->getOperand(1).getImm());
1648 case X86::SEH_PushFrame
:
1649 OutStreamer
->emitWinCFIPushFrame(MI
->getOperand(0).getImm());
1652 case X86::SEH_EndPrologue
:
1653 OutStreamer
->emitWinCFIEndProlog();
1657 llvm_unreachable("expected SEH_ instruction");
1661 static unsigned getRegisterWidth(const MCOperandInfo
&Info
) {
1662 if (Info
.RegClass
== X86::VR128RegClassID
||
1663 Info
.RegClass
== X86::VR128XRegClassID
)
1665 if (Info
.RegClass
== X86::VR256RegClassID
||
1666 Info
.RegClass
== X86::VR256XRegClassID
)
1668 if (Info
.RegClass
== X86::VR512RegClassID
)
1670 llvm_unreachable("Unknown register class!");
1673 static void addConstantComments(const MachineInstr
*MI
,
1674 MCStreamer
&OutStreamer
) {
1675 switch (MI
->getOpcode()) {
1676 // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1677 // a constant shuffle mask. We won't be able to do this at the MC layer
1678 // because the mask isn't an immediate.
1680 case X86::VPSHUFBrm
:
1681 case X86::VPSHUFBYrm
:
1682 case X86::VPSHUFBZ128rm
:
1683 case X86::VPSHUFBZ128rmk
:
1684 case X86::VPSHUFBZ128rmkz
:
1685 case X86::VPSHUFBZ256rm
:
1686 case X86::VPSHUFBZ256rmk
:
1687 case X86::VPSHUFBZ256rmkz
:
1688 case X86::VPSHUFBZrm
:
1689 case X86::VPSHUFBZrmk
:
1690 case X86::VPSHUFBZrmkz
: {
1691 unsigned SrcIdx
= 1;
1692 if (X86II::isKMasked(MI
->getDesc().TSFlags
)) {
1693 // Skip mask operand.
1695 if (X86II::isKMergeMasked(MI
->getDesc().TSFlags
)) {
1696 // Skip passthru operand.
1701 if (auto *C
= X86::getConstantFromPool(*MI
, SrcIdx
+ 1)) {
1702 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1703 SmallVector
<int, 64> Mask
;
1704 DecodePSHUFBMask(C
, Width
, Mask
);
1706 OutStreamer
.AddComment(getShuffleComment(MI
, SrcIdx
, SrcIdx
, Mask
));
1711 case X86::VPERMILPSrm
:
1712 case X86::VPERMILPSYrm
:
1713 case X86::VPERMILPSZ128rm
:
1714 case X86::VPERMILPSZ128rmk
:
1715 case X86::VPERMILPSZ128rmkz
:
1716 case X86::VPERMILPSZ256rm
:
1717 case X86::VPERMILPSZ256rmk
:
1718 case X86::VPERMILPSZ256rmkz
:
1719 case X86::VPERMILPSZrm
:
1720 case X86::VPERMILPSZrmk
:
1721 case X86::VPERMILPSZrmkz
:
1722 case X86::VPERMILPDrm
:
1723 case X86::VPERMILPDYrm
:
1724 case X86::VPERMILPDZ128rm
:
1725 case X86::VPERMILPDZ128rmk
:
1726 case X86::VPERMILPDZ128rmkz
:
1727 case X86::VPERMILPDZ256rm
:
1728 case X86::VPERMILPDZ256rmk
:
1729 case X86::VPERMILPDZ256rmkz
:
1730 case X86::VPERMILPDZrm
:
1731 case X86::VPERMILPDZrmk
:
1732 case X86::VPERMILPDZrmkz
: {
1734 switch (MI
->getOpcode()) {
1735 default: llvm_unreachable("Invalid opcode");
1736 case X86::VPERMILPSrm
:
1737 case X86::VPERMILPSYrm
:
1738 case X86::VPERMILPSZ128rm
:
1739 case X86::VPERMILPSZ256rm
:
1740 case X86::VPERMILPSZrm
:
1741 case X86::VPERMILPSZ128rmkz
:
1742 case X86::VPERMILPSZ256rmkz
:
1743 case X86::VPERMILPSZrmkz
:
1744 case X86::VPERMILPSZ128rmk
:
1745 case X86::VPERMILPSZ256rmk
:
1746 case X86::VPERMILPSZrmk
:
1749 case X86::VPERMILPDrm
:
1750 case X86::VPERMILPDYrm
:
1751 case X86::VPERMILPDZ128rm
:
1752 case X86::VPERMILPDZ256rm
:
1753 case X86::VPERMILPDZrm
:
1754 case X86::VPERMILPDZ128rmkz
:
1755 case X86::VPERMILPDZ256rmkz
:
1756 case X86::VPERMILPDZrmkz
:
1757 case X86::VPERMILPDZ128rmk
:
1758 case X86::VPERMILPDZ256rmk
:
1759 case X86::VPERMILPDZrmk
:
1764 unsigned SrcIdx
= 1;
1765 if (X86II::isKMasked(MI
->getDesc().TSFlags
)) {
1766 // Skip mask operand.
1768 if (X86II::isKMergeMasked(MI
->getDesc().TSFlags
)) {
1769 // Skip passthru operand.
1774 if (auto *C
= X86::getConstantFromPool(*MI
, SrcIdx
+ 1)) {
1775 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1776 SmallVector
<int, 16> Mask
;
1777 DecodeVPERMILPMask(C
, ElSize
, Width
, Mask
);
1779 OutStreamer
.AddComment(getShuffleComment(MI
, SrcIdx
, SrcIdx
, Mask
));
1784 case X86::VPERMIL2PDrm
:
1785 case X86::VPERMIL2PSrm
:
1786 case X86::VPERMIL2PDYrm
:
1787 case X86::VPERMIL2PSYrm
: {
1788 assert(MI
->getNumOperands() >= (3 + X86::AddrNumOperands
+ 1) &&
1789 "Unexpected number of operands!");
1791 const MachineOperand
&CtrlOp
= MI
->getOperand(MI
->getNumOperands() - 1);
1792 if (!CtrlOp
.isImm())
1796 switch (MI
->getOpcode()) {
1797 default: llvm_unreachable("Invalid opcode");
1798 case X86::VPERMIL2PSrm
: case X86::VPERMIL2PSYrm
: ElSize
= 32; break;
1799 case X86::VPERMIL2PDrm
: case X86::VPERMIL2PDYrm
: ElSize
= 64; break;
1802 if (auto *C
= X86::getConstantFromPool(*MI
, 3)) {
1803 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1804 SmallVector
<int, 16> Mask
;
1805 DecodeVPERMIL2PMask(C
, (unsigned)CtrlOp
.getImm(), ElSize
, Width
, Mask
);
1807 OutStreamer
.AddComment(getShuffleComment(MI
, 1, 2, Mask
));
1812 case X86::VPPERMrrm
: {
1813 if (auto *C
= X86::getConstantFromPool(*MI
, 3)) {
1814 unsigned Width
= getRegisterWidth(MI
->getDesc().operands()[0]);
1815 SmallVector
<int, 16> Mask
;
1816 DecodeVPPERMMask(C
, Width
, Mask
);
1818 OutStreamer
.AddComment(getShuffleComment(MI
, 1, 2, Mask
));
1823 case X86::MMX_MOVQ64rm
: {
1824 if (auto *C
= X86::getConstantFromPool(*MI
, 1)) {
1825 std::string Comment
;
1826 raw_string_ostream
CS(Comment
);
1827 const MachineOperand
&DstOp
= MI
->getOperand(0);
1828 CS
<< X86ATTInstPrinter::getRegisterName(DstOp
.getReg()) << " = ";
1829 if (auto *CF
= dyn_cast
<ConstantFP
>(C
)) {
1830 CS
<< "0x" << toString(CF
->getValueAPF().bitcastToAPInt(), 16, false);
1831 OutStreamer
.AddComment(CS
.str());
1839 case X86::VMOVSDZrm
:
1840 case X86::MOVSDrm_alt
:
1841 case X86::VMOVSDrm_alt
:
1842 case X86::VMOVSDZrm_alt
:
1843 case X86::MOVQI2PQIrm
:
1844 case X86::VMOVQI2PQIrm
:
1845 case X86::VMOVQI2PQIZrm
:
1846 printZeroUpperMove(MI
, OutStreamer
, 64, 128, "mem[0],zero");
1851 case X86::VMOVSSZrm
:
1852 case X86::MOVSSrm_alt
:
1853 case X86::VMOVSSrm_alt
:
1854 case X86::VMOVSSZrm_alt
:
1855 case X86::MOVDI2PDIrm
:
1856 case X86::VMOVDI2PDIrm
:
1857 case X86::VMOVDI2PDIZrm
:
1858 printZeroUpperMove(MI
, OutStreamer
, 32, 128, "mem[0],zero,zero,zero");
1861 #define MOV_CASE(Prefix, Suffix) \
1862 case X86::Prefix##MOVAPD##Suffix##rm: \
1863 case X86::Prefix##MOVAPS##Suffix##rm: \
1864 case X86::Prefix##MOVUPD##Suffix##rm: \
1865 case X86::Prefix##MOVUPS##Suffix##rm: \
1866 case X86::Prefix##MOVDQA##Suffix##rm: \
1867 case X86::Prefix##MOVDQU##Suffix##rm:
1869 #define MOV_AVX512_CASE(Suffix) \
1870 case X86::VMOVDQA64##Suffix##rm: \
1871 case X86::VMOVDQA32##Suffix##rm: \
1872 case X86::VMOVDQU64##Suffix##rm: \
1873 case X86::VMOVDQU32##Suffix##rm: \
1874 case X86::VMOVDQU16##Suffix##rm: \
1875 case X86::VMOVDQU8##Suffix##rm: \
1876 case X86::VMOVAPS##Suffix##rm: \
1877 case X86::VMOVAPD##Suffix##rm: \
1878 case X86::VMOVUPS##Suffix##rm: \
1879 case X86::VMOVUPD##Suffix##rm:
1881 #define CASE_128_MOV_RM() \
1882 MOV_CASE(, ) /* SSE */ \
1883 MOV_CASE(V, ) /* AVX-128 */ \
1884 MOV_AVX512_CASE(Z128)
1886 #define CASE_256_MOV_RM() \
1887 MOV_CASE(V, Y) /* AVX-256 */ \
1888 MOV_AVX512_CASE(Z256)
1890 #define CASE_512_MOV_RM() \
1893 // For loads from a constant pool to a vector register, print the constant
1896 printBroadcast(MI
, OutStreamer
, 1, 128);
1899 printBroadcast(MI
, OutStreamer
, 1, 256);
1902 printBroadcast(MI
, OutStreamer
, 1, 512);
1904 case X86::VBROADCASTF128rm
:
1905 case X86::VBROADCASTI128rm
:
1906 case X86::VBROADCASTF32X4Z256rm
:
1907 case X86::VBROADCASTF64X2Z128rm
:
1908 case X86::VBROADCASTI32X4Z256rm
:
1909 case X86::VBROADCASTI64X2Z128rm
:
1910 printBroadcast(MI
, OutStreamer
, 2, 128);
1912 case X86::VBROADCASTF32X4rm
:
1913 case X86::VBROADCASTF64X2rm
:
1914 case X86::VBROADCASTI32X4rm
:
1915 case X86::VBROADCASTI64X2rm
:
1916 printBroadcast(MI
, OutStreamer
, 4, 128);
1918 case X86::VBROADCASTF32X8rm
:
1919 case X86::VBROADCASTF64X4rm
:
1920 case X86::VBROADCASTI32X8rm
:
1921 case X86::VBROADCASTI64X4rm
:
1922 printBroadcast(MI
, OutStreamer
, 2, 256);
1925 // For broadcast loads from a constant pool to a vector register, repeatedly
1926 // print the constant loaded.
1927 case X86::MOVDDUPrm
:
1928 case X86::VMOVDDUPrm
:
1929 case X86::VMOVDDUPZ128rm
:
1930 case X86::VPBROADCASTQrm
:
1931 case X86::VPBROADCASTQZ128rm
:
1932 printBroadcast(MI
, OutStreamer
, 2, 64);
1934 case X86::VBROADCASTSDYrm
:
1935 case X86::VBROADCASTSDZ256rm
:
1936 case X86::VPBROADCASTQYrm
:
1937 case X86::VPBROADCASTQZ256rm
:
1938 printBroadcast(MI
, OutStreamer
, 4, 64);
1940 case X86::VBROADCASTSDZrm
:
1941 case X86::VPBROADCASTQZrm
:
1942 printBroadcast(MI
, OutStreamer
, 8, 64);
1944 case X86::VBROADCASTSSrm
:
1945 case X86::VBROADCASTSSZ128rm
:
1946 case X86::VPBROADCASTDrm
:
1947 case X86::VPBROADCASTDZ128rm
:
1948 printBroadcast(MI
, OutStreamer
, 4, 32);
1950 case X86::VBROADCASTSSYrm
:
1951 case X86::VBROADCASTSSZ256rm
:
1952 case X86::VPBROADCASTDYrm
:
1953 case X86::VPBROADCASTDZ256rm
:
1954 printBroadcast(MI
, OutStreamer
, 8, 32);
1956 case X86::VBROADCASTSSZrm
:
1957 case X86::VPBROADCASTDZrm
:
1958 printBroadcast(MI
, OutStreamer
, 16, 32);
1960 case X86::VPBROADCASTWrm
:
1961 case X86::VPBROADCASTWZ128rm
:
1962 printBroadcast(MI
, OutStreamer
, 8, 16);
1964 case X86::VPBROADCASTWYrm
:
1965 case X86::VPBROADCASTWZ256rm
:
1966 printBroadcast(MI
, OutStreamer
, 16, 16);
1968 case X86::VPBROADCASTWZrm
:
1969 printBroadcast(MI
, OutStreamer
, 32, 16);
1971 case X86::VPBROADCASTBrm
:
1972 case X86::VPBROADCASTBZ128rm
:
1973 printBroadcast(MI
, OutStreamer
, 16, 8);
1975 case X86::VPBROADCASTBYrm
:
1976 case X86::VPBROADCASTBZ256rm
:
1977 printBroadcast(MI
, OutStreamer
, 32, 8);
1979 case X86::VPBROADCASTBZrm
:
1980 printBroadcast(MI
, OutStreamer
, 64, 8);
1985 void X86AsmPrinter::emitInstruction(const MachineInstr
*MI
) {
1986 // FIXME: Enable feature predicate checks once all the test pass.
1987 // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
1988 // Subtarget->getFeatureBits());
1990 X86MCInstLower
MCInstLowering(*MF
, *this);
1991 const X86RegisterInfo
*RI
=
1992 MF
->getSubtarget
<X86Subtarget
>().getRegisterInfo();
1994 if (MI
->getOpcode() == X86::OR64rm
) {
1995 for (auto &Opd
: MI
->operands()) {
1996 if (Opd
.isSymbol() && StringRef(Opd
.getSymbolName()) ==
1997 "swift_async_extendedFramePointerFlags") {
1998 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags
= true;
2003 // Add comments for values loaded from constant pool.
2004 if (OutStreamer
->isVerboseAsm())
2005 addConstantComments(MI
, *OutStreamer
);
2007 // Add a comment about EVEX compression
2008 if (TM
.Options
.MCOptions
.ShowMCEncoding
) {
2009 if (MI
->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY
)
2010 OutStreamer
->AddComment("EVEX TO LEGACY Compression ", false);
2011 else if (MI
->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX
)
2012 OutStreamer
->AddComment("EVEX TO VEX Compression ", false);
2013 else if (MI
->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX
)
2014 OutStreamer
->AddComment("EVEX TO EVEX Compression ", false);
2017 switch (MI
->getOpcode()) {
2018 case TargetOpcode::DBG_VALUE
:
2019 llvm_unreachable("Should be handled target independently");
2021 case X86::EH_RETURN
:
2022 case X86::EH_RETURN64
: {
2023 // Lower these as normal, but add some comments.
2024 Register Reg
= MI
->getOperand(0).getReg();
2025 OutStreamer
->AddComment(StringRef("eh_return, addr: %") +
2026 X86ATTInstPrinter::getRegisterName(Reg
));
2029 case X86::CLEANUPRET
: {
2030 // Lower these as normal, but add some comments.
2031 OutStreamer
->AddComment("CLEANUPRET");
2035 case X86::CATCHRET
: {
2036 // Lower these as normal, but add some comments.
2037 OutStreamer
->AddComment("CATCHRET");
2042 case X86::ENDBR64
: {
2043 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2044 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2045 // non-empty. If MI is the initial ENDBR, place the
2046 // __patchable_function_entries label after ENDBR.
2047 if (CurrentPatchableFunctionEntrySym
&&
2048 CurrentPatchableFunctionEntrySym
== CurrentFnBegin
&&
2049 MI
== &MF
->front().front()) {
2051 MCInstLowering
.Lower(MI
, Inst
);
2052 EmitAndCountInstruction(Inst
);
2053 CurrentPatchableFunctionEntrySym
= createTempSymbol("patch");
2054 OutStreamer
->emitLabel(CurrentPatchableFunctionEntrySym
);
2060 case X86::TAILJMPd64
:
2061 if (IndCSPrefix
&& MI
->hasRegisterImplicitUseOperand(X86::R11
))
2062 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX
));
2067 case X86::TAILJMPd_CC
:
2068 case X86::TAILJMPr64
:
2069 case X86::TAILJMPm64
:
2070 case X86::TAILJMPd64_CC
:
2071 case X86::TAILJMPr64_REX
:
2072 case X86::TAILJMPm64_REX
:
2073 // Lower these as normal, but add some comments.
2074 OutStreamer
->AddComment("TAILCALL");
2077 case X86::TLS_addr32
:
2078 case X86::TLS_addr64
:
2079 case X86::TLS_addrX32
:
2080 case X86::TLS_base_addr32
:
2081 case X86::TLS_base_addr64
:
2082 case X86::TLS_base_addrX32
:
2083 return LowerTlsAddr(MCInstLowering
, *MI
);
2085 case X86::MOVPC32r
: {
2086 // This is a pseudo op for a two instruction sequence with a label, which
2093 MCSymbol
*PICBase
= MF
->getPICBaseSymbol();
2094 // FIXME: We would like an efficient form for this, so we don't have to do a
2095 // lot of extra uniquing.
2096 EmitAndCountInstruction(
2097 MCInstBuilder(X86::CALLpcrel32
)
2098 .addExpr(MCSymbolRefExpr::create(PICBase
, OutContext
)));
2100 const X86FrameLowering
*FrameLowering
=
2101 MF
->getSubtarget
<X86Subtarget
>().getFrameLowering();
2102 bool hasFP
= FrameLowering
->hasFP(*MF
);
2104 // TODO: This is needed only if we require precise CFA.
2105 bool HasActiveDwarfFrame
= OutStreamer
->getNumFrameInfos() &&
2106 !OutStreamer
->getDwarfFrameInfos().back().End
;
2108 int stackGrowth
= -RI
->getSlotSize();
2110 if (HasActiveDwarfFrame
&& !hasFP
) {
2111 OutStreamer
->emitCFIAdjustCfaOffset(-stackGrowth
);
2112 MF
->getInfo
<X86MachineFunctionInfo
>()->setHasCFIAdjustCfa(true);
2116 OutStreamer
->emitLabel(PICBase
);
2119 EmitAndCountInstruction(
2120 MCInstBuilder(X86::POP32r
).addReg(MI
->getOperand(0).getReg()));
2122 if (HasActiveDwarfFrame
&& !hasFP
) {
2123 OutStreamer
->emitCFIAdjustCfaOffset(stackGrowth
);
2128 case X86::ADD32ri
: {
2129 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2130 if (MI
->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS
)
2133 // Okay, we have something like:
2134 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2136 // For this, we want to print something like:
2137 // MYGLOBAL + (. - PICBASE)
2138 // However, we can't generate a ".", so just emit a new label here and refer
2140 MCSymbol
*DotSym
= OutContext
.createTempSymbol();
2141 OutStreamer
->emitLabel(DotSym
);
2143 // Now that we have emitted the label, lower the complex operand expression.
2144 MCSymbol
*OpSym
= MCInstLowering
.GetSymbolFromOperand(MI
->getOperand(2));
2146 const MCExpr
*DotExpr
= MCSymbolRefExpr::create(DotSym
, OutContext
);
2147 const MCExpr
*PICBase
=
2148 MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), OutContext
);
2149 DotExpr
= MCBinaryExpr::createSub(DotExpr
, PICBase
, OutContext
);
2151 DotExpr
= MCBinaryExpr::createAdd(
2152 MCSymbolRefExpr::create(OpSym
, OutContext
), DotExpr
, OutContext
);
2154 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri
)
2155 .addReg(MI
->getOperand(0).getReg())
2156 .addReg(MI
->getOperand(1).getReg())
2160 case TargetOpcode::STATEPOINT
:
2161 return LowerSTATEPOINT(*MI
, MCInstLowering
);
2163 case TargetOpcode::FAULTING_OP
:
2164 return LowerFAULTING_OP(*MI
, MCInstLowering
);
2166 case TargetOpcode::FENTRY_CALL
:
2167 return LowerFENTRY_CALL(*MI
, MCInstLowering
);
2169 case TargetOpcode::PATCHABLE_OP
:
2170 return LowerPATCHABLE_OP(*MI
, MCInstLowering
);
2172 case TargetOpcode::STACKMAP
:
2173 return LowerSTACKMAP(*MI
);
2175 case TargetOpcode::PATCHPOINT
:
2176 return LowerPATCHPOINT(*MI
, MCInstLowering
);
2178 case TargetOpcode::PATCHABLE_FUNCTION_ENTER
:
2179 return LowerPATCHABLE_FUNCTION_ENTER(*MI
, MCInstLowering
);
2181 case TargetOpcode::PATCHABLE_RET
:
2182 return LowerPATCHABLE_RET(*MI
, MCInstLowering
);
2184 case TargetOpcode::PATCHABLE_TAIL_CALL
:
2185 return LowerPATCHABLE_TAIL_CALL(*MI
, MCInstLowering
);
2187 case TargetOpcode::PATCHABLE_EVENT_CALL
:
2188 return LowerPATCHABLE_EVENT_CALL(*MI
, MCInstLowering
);
2190 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL
:
2191 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI
, MCInstLowering
);
2193 case X86::MORESTACK_RET
:
2194 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget
)));
2197 case X86::KCFI_CHECK
:
2198 return LowerKCFI_CHECK(*MI
);
2200 case X86::ASAN_CHECK_MEMACCESS
:
2201 return LowerASAN_CHECK_MEMACCESS(*MI
);
2203 case X86::MORESTACK_RET_RESTORE_R10
:
2204 // Return, then restore R10.
2205 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget
)));
2206 EmitAndCountInstruction(
2207 MCInstBuilder(X86::MOV64rr
).addReg(X86::R10
).addReg(X86::RAX
));
2210 case X86::SEH_PushReg
:
2211 case X86::SEH_SaveReg
:
2212 case X86::SEH_SaveXMM
:
2213 case X86::SEH_StackAlloc
:
2214 case X86::SEH_StackAlign
:
2215 case X86::SEH_SetFrame
:
2216 case X86::SEH_PushFrame
:
2217 case X86::SEH_EndPrologue
:
2218 EmitSEHInstruction(MI
);
2221 case X86::SEH_Epilogue
: {
2222 assert(MF
->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2223 MachineBasicBlock::const_iterator
MBBI(MI
);
2224 // Check if preceded by a call and emit nop if so.
2225 for (MBBI
= PrevCrossBBInst(MBBI
);
2226 MBBI
!= MachineBasicBlock::const_iterator();
2227 MBBI
= PrevCrossBBInst(MBBI
)) {
2228 // Pseudo instructions that aren't a call are assumed to not emit any
2229 // code. If they do, we worst case generate unnecessary noops after a
2231 if (MBBI
->isCall() || !MBBI
->isPseudo()) {
2233 EmitAndCountInstruction(MCInstBuilder(X86::NOOP
));
2239 case X86::UBSAN_UD1
:
2240 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm
)
2244 .addReg(X86::NoRegister
)
2245 .addImm(MI
->getOperand(0).getImm())
2246 .addReg(X86::NoRegister
));
2248 case X86::CALL64pcrel32
:
2249 if (IndCSPrefix
&& MI
->hasRegisterImplicitUseOperand(X86::R11
))
2250 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX
));
2255 MCInstLowering
.Lower(MI
, TmpInst
);
2257 // Stackmap shadows cannot include branch targets, so we can count the bytes
2258 // in a call towards the shadow, but must ensure that the no thread returns
2259 // in to the stackmap shadow. The only way to achieve this is if the call
2260 // is at the end of the shadow.
2262 // Count then size of the call towards the shadow
2263 SMShadowTracker
.count(TmpInst
, getSubtargetInfo(), CodeEmitter
.get());
2264 // Then flush the shadow so that we fill with nops before the call, not
2266 SMShadowTracker
.emitShadowPadding(*OutStreamer
, getSubtargetInfo());
2267 // Then emit the call
2268 OutStreamer
->emitInstruction(TmpInst
, getSubtargetInfo());
2272 EmitAndCountInstruction(TmpInst
);