1 //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling, if-conversion, other late
11 // optimizations, or simply the encoding of the instructions.
13 //===----------------------------------------------------------------------===//
16 #include "X86FrameLowering.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86Subtarget.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
25 #include "llvm/IR/EHPersonalities.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/Target/TargetMachine.h"
30 #define DEBUG_TYPE "x86-pseudo"
31 #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
34 class X86ExpandPseudo
: public MachineFunctionPass
{
37 X86ExpandPseudo() : MachineFunctionPass(ID
) {}
39 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
41 AU
.addPreservedID(MachineLoopInfoID
);
42 AU
.addPreservedID(MachineDominatorsID
);
43 MachineFunctionPass::getAnalysisUsage(AU
);
46 const X86Subtarget
*STI
= nullptr;
47 const X86InstrInfo
*TII
= nullptr;
48 const X86RegisterInfo
*TRI
= nullptr;
49 const X86MachineFunctionInfo
*X86FI
= nullptr;
50 const X86FrameLowering
*X86FL
= nullptr;
52 bool runOnMachineFunction(MachineFunction
&MF
) override
;
54 MachineFunctionProperties
getRequiredProperties() const override
{
55 return MachineFunctionProperties().set(
56 MachineFunctionProperties::Property::NoVRegs
);
59 StringRef
getPassName() const override
{
60 return "X86 pseudo instruction expansion pass";
64 void ExpandICallBranchFunnel(MachineBasicBlock
*MBB
,
65 MachineBasicBlock::iterator MBBI
);
66 void expandCALL_RVMARKER(MachineBasicBlock
&MBB
,
67 MachineBasicBlock::iterator MBBI
);
68 bool ExpandMI(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
);
69 bool ExpandMBB(MachineBasicBlock
&MBB
);
71 /// This function expands pseudos which affects control flow.
72 /// It is done in separate pass to simplify blocks navigation in main
73 /// pass(calling ExpandMBB).
74 bool ExpandPseudosWhichAffectControlFlow(MachineFunction
&MF
);
76 /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions,
77 /// placed into separate block guarded by check for al register(for SystemV
79 void ExpandVastartSaveXmmRegs(
80 MachineBasicBlock
*EntryBlk
,
81 MachineBasicBlock::iterator VAStartPseudoInstr
) const;
83 char X86ExpandPseudo::ID
= 0;
85 } // End anonymous namespace.
87 INITIALIZE_PASS(X86ExpandPseudo
, DEBUG_TYPE
, X86_EXPAND_PSEUDO_NAME
, false,
90 void X86ExpandPseudo::ExpandICallBranchFunnel(
91 MachineBasicBlock
*MBB
, MachineBasicBlock::iterator MBBI
) {
92 MachineBasicBlock
*JTMBB
= MBB
;
93 MachineInstr
*JTInst
= &*MBBI
;
94 MachineFunction
*MF
= MBB
->getParent();
95 const BasicBlock
*BB
= MBB
->getBasicBlock();
96 auto InsPt
= MachineFunction::iterator(MBB
);
99 std::vector
<std::pair
<MachineBasicBlock
*, unsigned>> TargetMBBs
;
100 const DebugLoc
&DL
= JTInst
->getDebugLoc();
101 MachineOperand Selector
= JTInst
->getOperand(0);
102 const GlobalValue
*CombinedGlobal
= JTInst
->getOperand(1).getGlobal();
104 auto CmpTarget
= [&](unsigned Target
) {
105 if (Selector
.isReg())
106 MBB
->addLiveIn(Selector
.getReg());
107 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::LEA64r
), X86::R11
)
111 .addGlobalAddress(CombinedGlobal
,
112 JTInst
->getOperand(2 + 2 * Target
).getImm())
114 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::CMP64rr
))
119 auto CreateMBB
= [&]() {
120 auto *NewMBB
= MF
->CreateMachineBasicBlock(BB
);
121 MBB
->addSuccessor(NewMBB
);
122 if (!MBB
->isLiveIn(X86::EFLAGS
))
123 MBB
->addLiveIn(X86::EFLAGS
);
127 auto EmitCondJump
= [&](unsigned CC
, MachineBasicBlock
*ThenMBB
) {
128 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::JCC_1
)).addMBB(ThenMBB
).addImm(CC
);
130 auto *ElseMBB
= CreateMBB();
131 MF
->insert(InsPt
, ElseMBB
);
136 auto EmitCondJumpTarget
= [&](unsigned CC
, unsigned Target
) {
137 auto *ThenMBB
= CreateMBB();
138 TargetMBBs
.push_back({ThenMBB
, Target
});
139 EmitCondJump(CC
, ThenMBB
);
142 auto EmitTailCall
= [&](unsigned Target
) {
143 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::TAILJMPd64
))
144 .add(JTInst
->getOperand(3 + 2 * Target
));
147 std::function
<void(unsigned, unsigned)> EmitBranchFunnel
=
148 [&](unsigned FirstTarget
, unsigned NumTargets
) {
149 if (NumTargets
== 1) {
150 EmitTailCall(FirstTarget
);
154 if (NumTargets
== 2) {
155 CmpTarget(FirstTarget
+ 1);
156 EmitCondJumpTarget(X86::COND_B
, FirstTarget
);
157 EmitTailCall(FirstTarget
+ 1);
161 if (NumTargets
< 6) {
162 CmpTarget(FirstTarget
+ 1);
163 EmitCondJumpTarget(X86::COND_B
, FirstTarget
);
164 EmitCondJumpTarget(X86::COND_E
, FirstTarget
+ 1);
165 EmitBranchFunnel(FirstTarget
+ 2, NumTargets
- 2);
169 auto *ThenMBB
= CreateMBB();
170 CmpTarget(FirstTarget
+ (NumTargets
/ 2));
171 EmitCondJump(X86::COND_B
, ThenMBB
);
172 EmitCondJumpTarget(X86::COND_E
, FirstTarget
+ (NumTargets
/ 2));
173 EmitBranchFunnel(FirstTarget
+ (NumTargets
/ 2) + 1,
174 NumTargets
- (NumTargets
/ 2) - 1);
176 MF
->insert(InsPt
, ThenMBB
);
179 EmitBranchFunnel(FirstTarget
, NumTargets
/ 2);
182 EmitBranchFunnel(0, (JTInst
->getNumOperands() - 2) / 2);
183 for (auto P
: TargetMBBs
) {
184 MF
->insert(InsPt
, P
.first
);
185 BuildMI(P
.first
, DL
, TII
->get(X86::TAILJMPd64
))
186 .add(JTInst
->getOperand(3 + 2 * P
.second
));
188 JTMBB
->erase(JTInst
);
191 void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock
&MBB
,
192 MachineBasicBlock::iterator MBBI
) {
193 // Expand CALL_RVMARKER pseudo to call instruction, followed by the special
194 //"movq %rax, %rdi" marker.
195 MachineInstr
&MI
= *MBBI
;
197 MachineInstr
*OriginalCall
;
198 assert((MI
.getOperand(1).isGlobal() || MI
.getOperand(1).isReg()) &&
199 "invalid operand for regular call");
201 if (MI
.getOpcode() == X86::CALL64m_RVMARKER
)
203 else if (MI
.getOpcode() == X86::CALL64r_RVMARKER
)
205 else if (MI
.getOpcode() == X86::CALL64pcrel32_RVMARKER
)
206 Opc
= X86::CALL64pcrel32
;
208 llvm_unreachable("unexpected opcode");
210 OriginalCall
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(Opc
)).getInstr();
211 bool RAXImplicitDead
= false;
212 for (MachineOperand
&Op
: llvm::drop_begin(MI
.operands())) {
213 // RAX may be 'implicit dead', if there are no other users of the return
214 // value. We introduce a new use, so change it to 'implicit def'.
215 if (Op
.isReg() && Op
.isImplicit() && Op
.isDead() &&
216 TRI
->regsOverlap(Op
.getReg(), X86::RAX
)) {
219 RAXImplicitDead
= true;
221 OriginalCall
->addOperand(Op
);
224 // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be
225 // live across the earlier call. The call to the ObjC runtime function returns
226 // the first argument, so the value of %rax is unchanged after the ObjC
227 // runtime call. On Windows targets, the runtime call follows the regular
228 // x64 calling convention and expects the first argument in %rcx.
229 auto TargetReg
= STI
->getTargetTriple().isOSWindows() ? X86::RCX
: X86::RDI
;
230 auto *Marker
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(X86::MOV64rr
))
231 .addReg(TargetReg
, RegState::Define
)
234 if (MI
.shouldUpdateCallSiteInfo())
235 MBB
.getParent()->moveCallSiteInfo(&MI
, Marker
);
237 // Emit call to ObjC runtime.
238 const uint32_t *RegMask
=
239 TRI
->getCallPreservedMask(*MBB
.getParent(), CallingConv::C
);
240 MachineInstr
*RtCall
=
241 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(X86::CALL64pcrel32
))
242 .addGlobalAddress(MI
.getOperand(0).getGlobal(), 0, 0)
246 (RAXImplicitDead
? (RegState::Dead
| RegState::Define
)
249 MI
.eraseFromParent();
251 auto &TM
= MBB
.getParent()->getTarget();
252 // On Darwin platforms, wrap the expanded sequence in a bundle to prevent
253 // later optimizations from breaking up the sequence.
254 if (TM
.getTargetTriple().isOSDarwin())
255 finalizeBundle(MBB
, OriginalCall
->getIterator(),
256 std::next(RtCall
->getIterator()));
259 /// If \p MBBI is a pseudo instruction, this method expands
260 /// it to the corresponding (sequence of) actual instruction(s).
261 /// \returns true if \p MBBI has been expanded.
262 bool X86ExpandPseudo::ExpandMI(MachineBasicBlock
&MBB
,
263 MachineBasicBlock::iterator MBBI
) {
264 MachineInstr
&MI
= *MBBI
;
265 unsigned Opcode
= MI
.getOpcode();
266 const DebugLoc
&DL
= MBBI
->getDebugLoc();
270 case X86::TCRETURNdi
:
271 case X86::TCRETURNdicc
:
272 case X86::TCRETURNri
:
273 case X86::TCRETURNmi
:
274 case X86::TCRETURNdi64
:
275 case X86::TCRETURNdi64cc
:
276 case X86::TCRETURNri64
:
277 case X86::TCRETURNmi64
: {
278 bool isMem
= Opcode
== X86::TCRETURNmi
|| Opcode
== X86::TCRETURNmi64
;
279 MachineOperand
&JumpTarget
= MBBI
->getOperand(0);
280 MachineOperand
&StackAdjust
= MBBI
->getOperand(isMem
? X86::AddrNumOperands
282 assert(StackAdjust
.isImm() && "Expecting immediate value.");
284 // Adjust stack pointer.
285 int StackAdj
= StackAdjust
.getImm();
286 int MaxTCDelta
= X86FI
->getTCReturnAddrDelta();
288 assert(MaxTCDelta
<= 0 && "MaxTCDelta should never be positive");
290 // Incoporate the retaddr area.
291 Offset
= StackAdj
- MaxTCDelta
;
292 assert(Offset
>= 0 && "Offset should never be negative");
294 if (Opcode
== X86::TCRETURNdicc
|| Opcode
== X86::TCRETURNdi64cc
) {
295 assert(Offset
== 0 && "Conditional tail call cannot adjust the stack.");
299 // Check for possible merge with preceding ADD instruction.
300 Offset
+= X86FL
->mergeSPUpdates(MBB
, MBBI
, true);
301 X86FL
->emitSPUpdate(MBB
, MBBI
, DL
, Offset
, /*InEpilogue=*/true);
304 // Jump to label or value in register.
305 bool IsWin64
= STI
->isTargetWin64();
306 if (Opcode
== X86::TCRETURNdi
|| Opcode
== X86::TCRETURNdicc
||
307 Opcode
== X86::TCRETURNdi64
|| Opcode
== X86::TCRETURNdi64cc
) {
310 case X86::TCRETURNdi
:
313 case X86::TCRETURNdicc
:
314 Op
= X86::TAILJMPd_CC
;
316 case X86::TCRETURNdi64cc
:
317 assert(!MBB
.getParent()->hasWinCFI() &&
318 "Conditional tail calls confuse "
319 "the Win64 unwinder.");
320 Op
= X86::TAILJMPd64_CC
;
323 // Note: Win64 uses REX prefixes indirect jumps out of functions, but
325 Op
= X86::TAILJMPd64
;
328 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(Op
));
329 if (JumpTarget
.isGlobal()) {
330 MIB
.addGlobalAddress(JumpTarget
.getGlobal(), JumpTarget
.getOffset(),
331 JumpTarget
.getTargetFlags());
333 assert(JumpTarget
.isSymbol());
334 MIB
.addExternalSymbol(JumpTarget
.getSymbolName(),
335 JumpTarget
.getTargetFlags());
337 if (Op
== X86::TAILJMPd_CC
|| Op
== X86::TAILJMPd64_CC
) {
338 MIB
.addImm(MBBI
->getOperand(2).getImm());
341 } else if (Opcode
== X86::TCRETURNmi
|| Opcode
== X86::TCRETURNmi64
) {
342 unsigned Op
= (Opcode
== X86::TCRETURNmi
)
344 : (IsWin64
? X86::TAILJMPm64_REX
: X86::TAILJMPm64
);
345 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(Op
));
346 for (unsigned i
= 0; i
!= X86::AddrNumOperands
; ++i
)
347 MIB
.add(MBBI
->getOperand(i
));
348 } else if (Opcode
== X86::TCRETURNri64
) {
349 JumpTarget
.setIsKill();
350 BuildMI(MBB
, MBBI
, DL
,
351 TII
->get(IsWin64
? X86::TAILJMPr64_REX
: X86::TAILJMPr64
))
354 JumpTarget
.setIsKill();
355 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::TAILJMPr
))
359 MachineInstr
&NewMI
= *std::prev(MBBI
);
360 NewMI
.copyImplicitOps(*MBBI
->getParent()->getParent(), *MBBI
);
361 NewMI
.setCFIType(*MBB
.getParent(), MI
.getCFIType());
363 // Update the call site info.
364 if (MBBI
->isCandidateForCallSiteEntry())
365 MBB
.getParent()->moveCallSiteInfo(&*MBBI
, &NewMI
);
367 // Delete the pseudo instruction TCRETURN.
373 case X86::EH_RETURN64
: {
374 MachineOperand
&DestAddr
= MBBI
->getOperand(0);
375 assert(DestAddr
.isReg() && "Offset should be in register!");
376 const bool Uses64BitFramePtr
=
377 STI
->isTarget64BitLP64() || STI
->isTargetNaCl64();
378 Register StackPtr
= TRI
->getStackRegister();
379 BuildMI(MBB
, MBBI
, DL
,
380 TII
->get(Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
), StackPtr
)
381 .addReg(DestAddr
.getReg());
382 // The EH_RETURN pseudo is really removed during the MC Lowering.
386 // Adjust stack to erase error code
387 int64_t StackAdj
= MBBI
->getOperand(0).getImm();
388 X86FL
->emitSPUpdate(MBB
, MBBI
, DL
, StackAdj
, true);
389 // Replace pseudo with machine iret
390 unsigned RetOp
= STI
->is64Bit() ? X86::IRET64
: X86::IRET32
;
391 // Use UIRET if UINTR is present (except for building kernel)
392 if (STI
->is64Bit() && STI
->hasUINTR() &&
393 MBB
.getParent()->getTarget().getCodeModel() != CodeModel::Kernel
)
395 BuildMI(MBB
, MBBI
, DL
, TII
->get(RetOp
));
400 // Adjust stack to erase error code
401 int64_t StackAdj
= MBBI
->getOperand(0).getImm();
402 MachineInstrBuilder MIB
;
404 MIB
= BuildMI(MBB
, MBBI
, DL
,
405 TII
->get(STI
->is64Bit() ? X86::RET64
: X86::RET32
));
406 } else if (isUInt
<16>(StackAdj
)) {
407 MIB
= BuildMI(MBB
, MBBI
, DL
,
408 TII
->get(STI
->is64Bit() ? X86::RETI64
: X86::RETI32
))
411 assert(!STI
->is64Bit() &&
412 "shouldn't need to do this for x86_64 targets!");
413 // A ret can only handle immediates as big as 2**16-1. If we need to pop
414 // off bytes before the return address, we must do it manually.
415 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::POP32r
)).addReg(X86::ECX
, RegState::Define
);
416 X86FL
->emitSPUpdate(MBB
, MBBI
, DL
, StackAdj
, /*InEpilogue=*/true);
417 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::PUSH32r
)).addReg(X86::ECX
);
418 MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::RET32
));
420 for (unsigned I
= 1, E
= MBBI
->getNumOperands(); I
!= E
; ++I
)
421 MIB
.add(MBBI
->getOperand(I
));
425 case X86::LCMPXCHG16B_SAVE_RBX
: {
426 // Perform the following transformation.
427 // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
430 // actualcmpxchg Addr
432 const MachineOperand
&InArg
= MBBI
->getOperand(6);
433 Register SaveRbx
= MBBI
->getOperand(7).getReg();
435 // Copy the input argument of the pseudo into the argument of the
436 // actual instruction.
437 // NOTE: We don't copy the kill flag since the input might be the same reg
438 // as one of the other operands of LCMPXCHG16B.
439 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::RBX
, InArg
.getReg(), false);
440 // Create the actual instruction.
441 MachineInstr
*NewInstr
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::LCMPXCHG16B
));
442 // Copy the operands related to the address.
443 for (unsigned Idx
= 1; Idx
< 6; ++Idx
)
444 NewInstr
->addOperand(MBBI
->getOperand(Idx
));
445 // Finally, restore the value of RBX.
446 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::RBX
, SaveRbx
,
449 // Delete the pseudo.
450 MBBI
->eraseFromParent();
453 // Loading/storing mask pairs requires two kmov operations. The second one of
454 // these needs a 2 byte displacement relative to the specified address (with
455 // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the
456 // same spill size, they all are stored using MASKPAIR16STORE, loaded using
459 // The displacement value might wrap around in theory, thus the asserts in
461 case X86::MASKPAIR16LOAD
: {
462 int64_t Disp
= MBBI
->getOperand(1 + X86::AddrDisp
).getImm();
463 assert(Disp
>= 0 && Disp
<= INT32_MAX
- 2 && "Unexpected displacement");
464 Register Reg
= MBBI
->getOperand(0).getReg();
465 bool DstIsDead
= MBBI
->getOperand(0).isDead();
466 Register Reg0
= TRI
->getSubReg(Reg
, X86::sub_mask_0
);
467 Register Reg1
= TRI
->getSubReg(Reg
, X86::sub_mask_1
);
469 auto MIBLo
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::KMOVWkm
))
470 .addReg(Reg0
, RegState::Define
| getDeadRegState(DstIsDead
));
471 auto MIBHi
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::KMOVWkm
))
472 .addReg(Reg1
, RegState::Define
| getDeadRegState(DstIsDead
));
474 for (int i
= 0; i
< X86::AddrNumOperands
; ++i
) {
475 MIBLo
.add(MBBI
->getOperand(1 + i
));
476 if (i
== X86::AddrDisp
)
477 MIBHi
.addImm(Disp
+ 2);
479 MIBHi
.add(MBBI
->getOperand(1 + i
));
482 // Split the memory operand, adjusting the offset and size for the halves.
483 MachineMemOperand
*OldMMO
= MBBI
->memoperands().front();
484 MachineFunction
*MF
= MBB
.getParent();
485 MachineMemOperand
*MMOLo
= MF
->getMachineMemOperand(OldMMO
, 0, 2);
486 MachineMemOperand
*MMOHi
= MF
->getMachineMemOperand(OldMMO
, 2, 2);
488 MIBLo
.setMemRefs(MMOLo
);
489 MIBHi
.setMemRefs(MMOHi
);
491 // Delete the pseudo.
495 case X86::MASKPAIR16STORE
: {
496 int64_t Disp
= MBBI
->getOperand(X86::AddrDisp
).getImm();
497 assert(Disp
>= 0 && Disp
<= INT32_MAX
- 2 && "Unexpected displacement");
498 Register Reg
= MBBI
->getOperand(X86::AddrNumOperands
).getReg();
499 bool SrcIsKill
= MBBI
->getOperand(X86::AddrNumOperands
).isKill();
500 Register Reg0
= TRI
->getSubReg(Reg
, X86::sub_mask_0
);
501 Register Reg1
= TRI
->getSubReg(Reg
, X86::sub_mask_1
);
503 auto MIBLo
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::KMOVWmk
));
504 auto MIBHi
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::KMOVWmk
));
506 for (int i
= 0; i
< X86::AddrNumOperands
; ++i
) {
507 MIBLo
.add(MBBI
->getOperand(i
));
508 if (i
== X86::AddrDisp
)
509 MIBHi
.addImm(Disp
+ 2);
511 MIBHi
.add(MBBI
->getOperand(i
));
513 MIBLo
.addReg(Reg0
, getKillRegState(SrcIsKill
));
514 MIBHi
.addReg(Reg1
, getKillRegState(SrcIsKill
));
516 // Split the memory operand, adjusting the offset and size for the halves.
517 MachineMemOperand
*OldMMO
= MBBI
->memoperands().front();
518 MachineFunction
*MF
= MBB
.getParent();
519 MachineMemOperand
*MMOLo
= MF
->getMachineMemOperand(OldMMO
, 0, 2);
520 MachineMemOperand
*MMOHi
= MF
->getMachineMemOperand(OldMMO
, 2, 2);
522 MIBLo
.setMemRefs(MMOLo
);
523 MIBHi
.setMemRefs(MMOHi
);
525 // Delete the pseudo.
529 case X86::MWAITX_SAVE_RBX
: {
530 // Perform the following transformation.
531 // SaveRbx = pseudomwaitx InArg, SaveRbx
536 const MachineOperand
&InArg
= MBBI
->getOperand(1);
537 // Copy the input argument of the pseudo into the argument of the
538 // actual instruction.
539 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::EBX
, InArg
.getReg(), InArg
.isKill());
540 // Create the actual instruction.
541 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::MWAITXrrr
));
542 // Finally, restore the value of RBX.
543 Register SaveRbx
= MBBI
->getOperand(2).getReg();
544 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::RBX
, SaveRbx
, /*SrcIsKill*/ true);
545 // Delete the pseudo.
546 MBBI
->eraseFromParent();
549 case TargetOpcode::ICALL_BRANCH_FUNNEL
:
550 ExpandICallBranchFunnel(&MBB
, MBBI
);
552 case X86::PLDTILECFGV
: {
553 MI
.setDesc(TII
->get(X86::LDTILECFG
));
556 case X86::PTILELOADDV
:
557 case X86::PTILELOADDT1V
: {
558 for (unsigned i
= 2; i
> 0; --i
)
561 Opcode
== X86::PTILELOADDV
? X86::TILELOADD
: X86::TILELOADDT1
;
562 MI
.setDesc(TII
->get(Opc
));
565 case X86::PTCMMIMFP16PSV
:
566 case X86::PTCMMRLFP16PSV
:
571 case X86::PTDPBF16PSV
:
572 case X86::PTDPFP16PSV
: {
573 MI
.untieRegOperand(4);
574 for (unsigned i
= 3; i
> 0; --i
)
578 case X86::PTCMMIMFP16PSV
: Opc
= X86::TCMMIMFP16PS
; break;
579 case X86::PTCMMRLFP16PSV
: Opc
= X86::TCMMRLFP16PS
; break;
580 case X86::PTDPBSSDV
: Opc
= X86::TDPBSSD
; break;
581 case X86::PTDPBSUDV
: Opc
= X86::TDPBSUD
; break;
582 case X86::PTDPBUSDV
: Opc
= X86::TDPBUSD
; break;
583 case X86::PTDPBUUDV
: Opc
= X86::TDPBUUD
; break;
584 case X86::PTDPBF16PSV
: Opc
= X86::TDPBF16PS
; break;
585 case X86::PTDPFP16PSV
: Opc
= X86::TDPFP16PS
; break;
586 default: llvm_unreachable("Impossible Opcode!");
588 MI
.setDesc(TII
->get(Opc
));
589 MI
.tieOperands(0, 1);
592 case X86::PTILESTOREDV
: {
593 for (int i
= 1; i
>= 0; --i
)
595 MI
.setDesc(TII
->get(X86::TILESTORED
));
598 case X86::PTILEZEROV
: {
599 for (int i
= 2; i
> 0; --i
) // Remove row, col
601 MI
.setDesc(TII
->get(X86::TILEZERO
));
604 case X86::CALL64pcrel32_RVMARKER
:
605 case X86::CALL64r_RVMARKER
:
606 case X86::CALL64m_RVMARKER
:
607 expandCALL_RVMARKER(MBB
, MBBI
);
610 llvm_unreachable("Previous switch has a fallthrough?");
613 // This function creates additional block for storing varargs guarded
614 // registers. It adds check for %al into entry block, to skip
615 // GuardedRegsBlk if xmm registers should not be stored.
617 // EntryBlk[VAStartPseudoInstr] EntryBlk
620 // | | GuardedRegsBlk
627 void X86ExpandPseudo::ExpandVastartSaveXmmRegs(
628 MachineBasicBlock
*EntryBlk
,
629 MachineBasicBlock::iterator VAStartPseudoInstr
) const {
630 assert(VAStartPseudoInstr
->getOpcode() == X86::VASTART_SAVE_XMM_REGS
);
632 MachineFunction
*Func
= EntryBlk
->getParent();
633 const TargetInstrInfo
*TII
= STI
->getInstrInfo();
634 const DebugLoc
&DL
= VAStartPseudoInstr
->getDebugLoc();
635 Register CountReg
= VAStartPseudoInstr
->getOperand(0).getReg();
637 // Calculate liveins for newly created blocks.
638 LivePhysRegs
LiveRegs(*STI
->getRegisterInfo());
639 SmallVector
<std::pair
<MCPhysReg
, const MachineOperand
*>, 8> Clobbers
;
641 LiveRegs
.addLiveIns(*EntryBlk
);
642 for (MachineInstr
&MI
: EntryBlk
->instrs()) {
643 if (MI
.getOpcode() == VAStartPseudoInstr
->getOpcode())
646 LiveRegs
.stepForward(MI
, Clobbers
);
649 // Create the new basic blocks. One block contains all the XMM stores,
650 // and another block is the final destination regardless of whether any
651 // stores were performed.
652 const BasicBlock
*LLVMBlk
= EntryBlk
->getBasicBlock();
653 MachineFunction::iterator EntryBlkIter
= ++EntryBlk
->getIterator();
654 MachineBasicBlock
*GuardedRegsBlk
= Func
->CreateMachineBasicBlock(LLVMBlk
);
655 MachineBasicBlock
*TailBlk
= Func
->CreateMachineBasicBlock(LLVMBlk
);
656 Func
->insert(EntryBlkIter
, GuardedRegsBlk
);
657 Func
->insert(EntryBlkIter
, TailBlk
);
659 // Transfer the remainder of EntryBlk and its successor edges to TailBlk.
660 TailBlk
->splice(TailBlk
->begin(), EntryBlk
,
661 std::next(MachineBasicBlock::iterator(VAStartPseudoInstr
)),
663 TailBlk
->transferSuccessorsAndUpdatePHIs(EntryBlk
);
665 uint64_t FrameOffset
= VAStartPseudoInstr
->getOperand(4).getImm();
666 uint64_t VarArgsRegsOffset
= VAStartPseudoInstr
->getOperand(6).getImm();
668 // TODO: add support for YMM and ZMM here.
669 unsigned MOVOpc
= STI
->hasAVX() ? X86::VMOVAPSmr
: X86::MOVAPSmr
;
671 // In the XMM save block, save all the XMM argument registers.
672 for (int64_t OpndIdx
= 7, RegIdx
= 0;
673 OpndIdx
< VAStartPseudoInstr
->getNumOperands() - 1;
674 OpndIdx
++, RegIdx
++) {
675 auto NewMI
= BuildMI(GuardedRegsBlk
, DL
, TII
->get(MOVOpc
));
676 for (int i
= 0; i
< X86::AddrNumOperands
; ++i
) {
677 if (i
== X86::AddrDisp
)
678 NewMI
.addImm(FrameOffset
+ VarArgsRegsOffset
+ RegIdx
* 16);
680 NewMI
.add(VAStartPseudoInstr
->getOperand(i
+ 1));
682 NewMI
.addReg(VAStartPseudoInstr
->getOperand(OpndIdx
).getReg());
683 assert(VAStartPseudoInstr
->getOperand(OpndIdx
).getReg().isPhysical());
686 // The original block will now fall through to the GuardedRegsBlk.
687 EntryBlk
->addSuccessor(GuardedRegsBlk
);
688 // The GuardedRegsBlk will fall through to the TailBlk.
689 GuardedRegsBlk
->addSuccessor(TailBlk
);
691 if (!STI
->isCallingConvWin64(Func
->getFunction().getCallingConv())) {
692 // If %al is 0, branch around the XMM save block.
693 BuildMI(EntryBlk
, DL
, TII
->get(X86::TEST8rr
))
696 BuildMI(EntryBlk
, DL
, TII
->get(X86::JCC_1
))
698 .addImm(X86::COND_E
);
699 EntryBlk
->addSuccessor(TailBlk
);
702 // Add liveins to the created block.
703 addLiveIns(*GuardedRegsBlk
, LiveRegs
);
704 addLiveIns(*TailBlk
, LiveRegs
);
706 // Delete the pseudo.
707 VAStartPseudoInstr
->eraseFromParent();
710 /// Expand all pseudo instructions contained in \p MBB.
711 /// \returns true if any expansion occurred for \p MBB.
712 bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock
&MBB
) {
713 bool Modified
= false;
715 // MBBI may be invalidated by the expansion.
716 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
718 MachineBasicBlock::iterator NMBBI
= std::next(MBBI
);
719 Modified
|= ExpandMI(MBB
, MBBI
);
726 bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction
&MF
) {
727 // Currently pseudo which affects control flow is only
728 // X86::VASTART_SAVE_XMM_REGS which is located in Entry block.
729 // So we do not need to evaluate other blocks.
730 for (MachineInstr
&Instr
: MF
.front().instrs()) {
731 if (Instr
.getOpcode() == X86::VASTART_SAVE_XMM_REGS
) {
732 ExpandVastartSaveXmmRegs(&(MF
.front()), Instr
);
740 bool X86ExpandPseudo::runOnMachineFunction(MachineFunction
&MF
) {
741 STI
= &MF
.getSubtarget
<X86Subtarget
>();
742 TII
= STI
->getInstrInfo();
743 TRI
= STI
->getRegisterInfo();
744 X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
745 X86FL
= STI
->getFrameLowering();
747 bool Modified
= ExpandPseudosWhichAffectControlFlow(MF
);
749 for (MachineBasicBlock
&MBB
: MF
)
750 Modified
|= ExpandMBB(MBB
);
754 /// Returns an instance of the pseudo instruction expansion pass.
755 FunctionPass
*llvm::createX86ExpandPseudoPass() {
756 return new X86ExpandPseudo();