1 //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling, if-conversion, other late
11 // optimizations, or simply the encoding of the instructions.
13 //===----------------------------------------------------------------------===//
16 #include "X86FrameLowering.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86Subtarget.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
25 #include "llvm/IR/EHPersonalities.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/Target/TargetMachine.h"
30 #define DEBUG_TYPE "x86-pseudo"
31 #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
34 class X86ExpandPseudo
: public MachineFunctionPass
{
37 X86ExpandPseudo() : MachineFunctionPass(ID
) {}
39 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
41 AU
.addPreservedID(MachineLoopInfoID
);
42 AU
.addPreservedID(MachineDominatorsID
);
43 MachineFunctionPass::getAnalysisUsage(AU
);
46 const X86Subtarget
*STI
= nullptr;
47 const X86InstrInfo
*TII
= nullptr;
48 const X86RegisterInfo
*TRI
= nullptr;
49 const X86MachineFunctionInfo
*X86FI
= nullptr;
50 const X86FrameLowering
*X86FL
= nullptr;
52 bool runOnMachineFunction(MachineFunction
&MF
) override
;
54 MachineFunctionProperties
getRequiredProperties() const override
{
55 return MachineFunctionProperties().set(
56 MachineFunctionProperties::Property::NoVRegs
);
59 StringRef
getPassName() const override
{
60 return "X86 pseudo instruction expansion pass";
64 void ExpandICallBranchFunnel(MachineBasicBlock
*MBB
,
65 MachineBasicBlock::iterator MBBI
);
66 void expandCALL_RVMARKER(MachineBasicBlock
&MBB
,
67 MachineBasicBlock::iterator MBBI
);
68 bool ExpandMI(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
);
69 bool ExpandMBB(MachineBasicBlock
&MBB
);
71 /// This function expands pseudos which affects control flow.
72 /// It is done in separate pass to simplify blocks navigation in main
73 /// pass(calling ExpandMBB).
74 bool ExpandPseudosWhichAffectControlFlow(MachineFunction
&MF
);
76 /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions,
77 /// placed into separate block guarded by check for al register(for SystemV
79 void ExpandVastartSaveXmmRegs(
80 MachineBasicBlock
*EntryBlk
,
81 MachineBasicBlock::iterator VAStartPseudoInstr
) const;
83 char X86ExpandPseudo::ID
= 0;
85 } // End anonymous namespace.
87 INITIALIZE_PASS(X86ExpandPseudo
, DEBUG_TYPE
, X86_EXPAND_PSEUDO_NAME
, false,
90 void X86ExpandPseudo::ExpandICallBranchFunnel(
91 MachineBasicBlock
*MBB
, MachineBasicBlock::iterator MBBI
) {
92 MachineBasicBlock
*JTMBB
= MBB
;
93 MachineInstr
*JTInst
= &*MBBI
;
94 MachineFunction
*MF
= MBB
->getParent();
95 const BasicBlock
*BB
= MBB
->getBasicBlock();
96 auto InsPt
= MachineFunction::iterator(MBB
);
99 std::vector
<std::pair
<MachineBasicBlock
*, unsigned>> TargetMBBs
;
100 const DebugLoc
&DL
= JTInst
->getDebugLoc();
101 MachineOperand Selector
= JTInst
->getOperand(0);
102 const GlobalValue
*CombinedGlobal
= JTInst
->getOperand(1).getGlobal();
104 auto CmpTarget
= [&](unsigned Target
) {
105 if (Selector
.isReg())
106 MBB
->addLiveIn(Selector
.getReg());
107 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::LEA64r
), X86::R11
)
111 .addGlobalAddress(CombinedGlobal
,
112 JTInst
->getOperand(2 + 2 * Target
).getImm())
114 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::CMP64rr
))
119 auto CreateMBB
= [&]() {
120 auto *NewMBB
= MF
->CreateMachineBasicBlock(BB
);
121 MBB
->addSuccessor(NewMBB
);
122 if (!MBB
->isLiveIn(X86::EFLAGS
))
123 MBB
->addLiveIn(X86::EFLAGS
);
127 auto EmitCondJump
= [&](unsigned CC
, MachineBasicBlock
*ThenMBB
) {
128 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::JCC_1
)).addMBB(ThenMBB
).addImm(CC
);
130 auto *ElseMBB
= CreateMBB();
131 MF
->insert(InsPt
, ElseMBB
);
136 auto EmitCondJumpTarget
= [&](unsigned CC
, unsigned Target
) {
137 auto *ThenMBB
= CreateMBB();
138 TargetMBBs
.push_back({ThenMBB
, Target
});
139 EmitCondJump(CC
, ThenMBB
);
142 auto EmitTailCall
= [&](unsigned Target
) {
143 BuildMI(*MBB
, MBBI
, DL
, TII
->get(X86::TAILJMPd64
))
144 .add(JTInst
->getOperand(3 + 2 * Target
));
147 std::function
<void(unsigned, unsigned)> EmitBranchFunnel
=
148 [&](unsigned FirstTarget
, unsigned NumTargets
) {
149 if (NumTargets
== 1) {
150 EmitTailCall(FirstTarget
);
154 if (NumTargets
== 2) {
155 CmpTarget(FirstTarget
+ 1);
156 EmitCondJumpTarget(X86::COND_B
, FirstTarget
);
157 EmitTailCall(FirstTarget
+ 1);
161 if (NumTargets
< 6) {
162 CmpTarget(FirstTarget
+ 1);
163 EmitCondJumpTarget(X86::COND_B
, FirstTarget
);
164 EmitCondJumpTarget(X86::COND_E
, FirstTarget
+ 1);
165 EmitBranchFunnel(FirstTarget
+ 2, NumTargets
- 2);
169 auto *ThenMBB
= CreateMBB();
170 CmpTarget(FirstTarget
+ (NumTargets
/ 2));
171 EmitCondJump(X86::COND_B
, ThenMBB
);
172 EmitCondJumpTarget(X86::COND_E
, FirstTarget
+ (NumTargets
/ 2));
173 EmitBranchFunnel(FirstTarget
+ (NumTargets
/ 2) + 1,
174 NumTargets
- (NumTargets
/ 2) - 1);
176 MF
->insert(InsPt
, ThenMBB
);
179 EmitBranchFunnel(FirstTarget
, NumTargets
/ 2);
182 EmitBranchFunnel(0, (JTInst
->getNumOperands() - 2) / 2);
183 for (auto P
: TargetMBBs
) {
184 MF
->insert(InsPt
, P
.first
);
185 BuildMI(P
.first
, DL
, TII
->get(X86::TAILJMPd64
))
186 .add(JTInst
->getOperand(3 + 2 * P
.second
));
188 JTMBB
->erase(JTInst
);
191 void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock
&MBB
,
192 MachineBasicBlock::iterator MBBI
) {
193 // Expand CALL_RVMARKER pseudo to call instruction, followed by the special
194 //"movq %rax, %rdi" marker.
195 MachineInstr
&MI
= *MBBI
;
197 MachineInstr
*OriginalCall
;
198 assert((MI
.getOperand(1).isGlobal() || MI
.getOperand(1).isReg()) &&
199 "invalid operand for regular call");
201 if (MI
.getOpcode() == X86::CALL64m_RVMARKER
)
203 else if (MI
.getOpcode() == X86::CALL64r_RVMARKER
)
205 else if (MI
.getOpcode() == X86::CALL64pcrel32_RVMARKER
)
206 Opc
= X86::CALL64pcrel32
;
208 llvm_unreachable("unexpected opcode");
210 OriginalCall
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(Opc
)).getInstr();
211 bool RAXImplicitDead
= false;
212 for (MachineOperand
&Op
: llvm::drop_begin(MI
.operands())) {
213 // RAX may be 'implicit dead', if there are no other users of the return
214 // value. We introduce a new use, so change it to 'implicit def'.
215 if (Op
.isReg() && Op
.isImplicit() && Op
.isDead() &&
216 TRI
->regsOverlap(Op
.getReg(), X86::RAX
)) {
219 RAXImplicitDead
= true;
221 OriginalCall
->addOperand(Op
);
224 // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be
225 // live across the earlier call. The call to the ObjC runtime function returns
226 // the first argument, so the value of %rax is unchanged after the ObjC
227 // runtime call. On Windows targets, the runtime call follows the regular
228 // x64 calling convention and expects the first argument in %rcx.
229 auto TargetReg
= STI
->getTargetTriple().isOSWindows() ? X86::RCX
: X86::RDI
;
230 auto *Marker
= BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(X86::MOV64rr
))
231 .addReg(TargetReg
, RegState::Define
)
234 if (MI
.shouldUpdateCallSiteInfo())
235 MBB
.getParent()->moveCallSiteInfo(&MI
, Marker
);
237 // Emit call to ObjC runtime.
238 const uint32_t *RegMask
=
239 TRI
->getCallPreservedMask(*MBB
.getParent(), CallingConv::C
);
240 MachineInstr
*RtCall
=
241 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(), TII
->get(X86::CALL64pcrel32
))
242 .addGlobalAddress(MI
.getOperand(0).getGlobal(), 0, 0)
246 (RAXImplicitDead
? (RegState::Dead
| RegState::Define
)
249 MI
.eraseFromParent();
251 auto &TM
= MBB
.getParent()->getTarget();
252 // On Darwin platforms, wrap the expanded sequence in a bundle to prevent
253 // later optimizations from breaking up the sequence.
254 if (TM
.getTargetTriple().isOSDarwin())
255 finalizeBundle(MBB
, OriginalCall
->getIterator(),
256 std::next(RtCall
->getIterator()));
259 /// If \p MBBI is a pseudo instruction, this method expands
260 /// it to the corresponding (sequence of) actual instruction(s).
261 /// \returns true if \p MBBI has been expanded.
262 bool X86ExpandPseudo::ExpandMI(MachineBasicBlock
&MBB
,
263 MachineBasicBlock::iterator MBBI
) {
264 MachineInstr
&MI
= *MBBI
;
265 unsigned Opcode
= MI
.getOpcode();
266 const DebugLoc
&DL
= MBBI
->getDebugLoc();
267 bool HasEGPR
= STI
->hasEGPR();
271 case X86::TCRETURNdi
:
272 case X86::TCRETURNdicc
:
273 case X86::TCRETURNri
:
274 case X86::TCRETURNmi
:
275 case X86::TCRETURNdi64
:
276 case X86::TCRETURNdi64cc
:
277 case X86::TCRETURNri64
:
278 case X86::TCRETURNmi64
: {
279 bool isMem
= Opcode
== X86::TCRETURNmi
|| Opcode
== X86::TCRETURNmi64
;
280 MachineOperand
&JumpTarget
= MBBI
->getOperand(0);
281 MachineOperand
&StackAdjust
= MBBI
->getOperand(isMem
? X86::AddrNumOperands
283 assert(StackAdjust
.isImm() && "Expecting immediate value.");
285 // Adjust stack pointer.
286 int StackAdj
= StackAdjust
.getImm();
287 int MaxTCDelta
= X86FI
->getTCReturnAddrDelta();
289 assert(MaxTCDelta
<= 0 && "MaxTCDelta should never be positive");
291 // Incoporate the retaddr area.
292 Offset
= StackAdj
- MaxTCDelta
;
293 assert(Offset
>= 0 && "Offset should never be negative");
295 if (Opcode
== X86::TCRETURNdicc
|| Opcode
== X86::TCRETURNdi64cc
) {
296 assert(Offset
== 0 && "Conditional tail call cannot adjust the stack.");
300 // Check for possible merge with preceding ADD instruction.
301 Offset
+= X86FL
->mergeSPUpdates(MBB
, MBBI
, true);
302 X86FL
->emitSPUpdate(MBB
, MBBI
, DL
, Offset
, /*InEpilogue=*/true);
305 // Jump to label or value in register.
306 bool IsWin64
= STI
->isTargetWin64();
307 if (Opcode
== X86::TCRETURNdi
|| Opcode
== X86::TCRETURNdicc
||
308 Opcode
== X86::TCRETURNdi64
|| Opcode
== X86::TCRETURNdi64cc
) {
311 case X86::TCRETURNdi
:
314 case X86::TCRETURNdicc
:
315 Op
= X86::TAILJMPd_CC
;
317 case X86::TCRETURNdi64cc
:
318 assert(!MBB
.getParent()->hasWinCFI() &&
319 "Conditional tail calls confuse "
320 "the Win64 unwinder.");
321 Op
= X86::TAILJMPd64_CC
;
324 // Note: Win64 uses REX prefixes indirect jumps out of functions, but
326 Op
= X86::TAILJMPd64
;
329 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(Op
));
330 if (JumpTarget
.isGlobal()) {
331 MIB
.addGlobalAddress(JumpTarget
.getGlobal(), JumpTarget
.getOffset(),
332 JumpTarget
.getTargetFlags());
334 assert(JumpTarget
.isSymbol());
335 MIB
.addExternalSymbol(JumpTarget
.getSymbolName(),
336 JumpTarget
.getTargetFlags());
338 if (Op
== X86::TAILJMPd_CC
|| Op
== X86::TAILJMPd64_CC
) {
339 MIB
.addImm(MBBI
->getOperand(2).getImm());
342 } else if (Opcode
== X86::TCRETURNmi
|| Opcode
== X86::TCRETURNmi64
) {
343 unsigned Op
= (Opcode
== X86::TCRETURNmi
)
345 : (IsWin64
? X86::TAILJMPm64_REX
: X86::TAILJMPm64
);
346 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(Op
));
347 for (unsigned i
= 0; i
!= X86::AddrNumOperands
; ++i
)
348 MIB
.add(MBBI
->getOperand(i
));
349 } else if (Opcode
== X86::TCRETURNri64
) {
350 JumpTarget
.setIsKill();
351 BuildMI(MBB
, MBBI
, DL
,
352 TII
->get(IsWin64
? X86::TAILJMPr64_REX
: X86::TAILJMPr64
))
355 JumpTarget
.setIsKill();
356 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::TAILJMPr
))
360 MachineInstr
&NewMI
= *std::prev(MBBI
);
361 NewMI
.copyImplicitOps(*MBBI
->getParent()->getParent(), *MBBI
);
362 NewMI
.setCFIType(*MBB
.getParent(), MI
.getCFIType());
364 // Update the call site info.
365 if (MBBI
->isCandidateForCallSiteEntry())
366 MBB
.getParent()->moveCallSiteInfo(&*MBBI
, &NewMI
);
368 // Delete the pseudo instruction TCRETURN.
374 case X86::EH_RETURN64
: {
375 MachineOperand
&DestAddr
= MBBI
->getOperand(0);
376 assert(DestAddr
.isReg() && "Offset should be in register!");
377 const bool Uses64BitFramePtr
=
378 STI
->isTarget64BitLP64() || STI
->isTargetNaCl64();
379 Register StackPtr
= TRI
->getStackRegister();
380 BuildMI(MBB
, MBBI
, DL
,
381 TII
->get(Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
), StackPtr
)
382 .addReg(DestAddr
.getReg());
383 // The EH_RETURN pseudo is really removed during the MC Lowering.
387 // Adjust stack to erase error code
388 int64_t StackAdj
= MBBI
->getOperand(0).getImm();
389 X86FL
->emitSPUpdate(MBB
, MBBI
, DL
, StackAdj
, true);
390 // Replace pseudo with machine iret
391 unsigned RetOp
= STI
->is64Bit() ? X86::IRET64
: X86::IRET32
;
392 // Use UIRET if UINTR is present (except for building kernel)
393 if (STI
->is64Bit() && STI
->hasUINTR() &&
394 MBB
.getParent()->getTarget().getCodeModel() != CodeModel::Kernel
)
396 BuildMI(MBB
, MBBI
, DL
, TII
->get(RetOp
));
401 // Adjust stack to erase error code
402 int64_t StackAdj
= MBBI
->getOperand(0).getImm();
403 MachineInstrBuilder MIB
;
405 MIB
= BuildMI(MBB
, MBBI
, DL
,
406 TII
->get(STI
->is64Bit() ? X86::RET64
: X86::RET32
));
407 } else if (isUInt
<16>(StackAdj
)) {
408 MIB
= BuildMI(MBB
, MBBI
, DL
,
409 TII
->get(STI
->is64Bit() ? X86::RETI64
: X86::RETI32
))
412 assert(!STI
->is64Bit() &&
413 "shouldn't need to do this for x86_64 targets!");
414 // A ret can only handle immediates as big as 2**16-1. If we need to pop
415 // off bytes before the return address, we must do it manually.
416 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::POP32r
)).addReg(X86::ECX
, RegState::Define
);
417 X86FL
->emitSPUpdate(MBB
, MBBI
, DL
, StackAdj
, /*InEpilogue=*/true);
418 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::PUSH32r
)).addReg(X86::ECX
);
419 MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::RET32
));
421 for (unsigned I
= 1, E
= MBBI
->getNumOperands(); I
!= E
; ++I
)
422 MIB
.add(MBBI
->getOperand(I
));
426 case X86::LCMPXCHG16B_SAVE_RBX
: {
427 // Perform the following transformation.
428 // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
431 // actualcmpxchg Addr
433 const MachineOperand
&InArg
= MBBI
->getOperand(6);
434 Register SaveRbx
= MBBI
->getOperand(7).getReg();
436 // Copy the input argument of the pseudo into the argument of the
437 // actual instruction.
438 // NOTE: We don't copy the kill flag since the input might be the same reg
439 // as one of the other operands of LCMPXCHG16B.
440 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::RBX
, InArg
.getReg(), false);
441 // Create the actual instruction.
442 MachineInstr
*NewInstr
= BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::LCMPXCHG16B
));
443 // Copy the operands related to the address.
444 for (unsigned Idx
= 1; Idx
< 6; ++Idx
)
445 NewInstr
->addOperand(MBBI
->getOperand(Idx
));
446 // Finally, restore the value of RBX.
447 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::RBX
, SaveRbx
,
450 // Delete the pseudo.
451 MBBI
->eraseFromParent();
454 // Loading/storing mask pairs requires two kmov operations. The second one of
455 // these needs a 2 byte displacement relative to the specified address (with
456 // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the
457 // same spill size, they all are stored using MASKPAIR16STORE, loaded using
460 // The displacement value might wrap around in theory, thus the asserts in
462 case X86::MASKPAIR16LOAD
: {
463 int64_t Disp
= MBBI
->getOperand(1 + X86::AddrDisp
).getImm();
464 assert(Disp
>= 0 && Disp
<= INT32_MAX
- 2 && "Unexpected displacement");
465 Register Reg
= MBBI
->getOperand(0).getReg();
466 bool DstIsDead
= MBBI
->getOperand(0).isDead();
467 Register Reg0
= TRI
->getSubReg(Reg
, X86::sub_mask_0
);
468 Register Reg1
= TRI
->getSubReg(Reg
, X86::sub_mask_1
);
471 BuildMI(MBB
, MBBI
, DL
,
472 TII
->get(HasEGPR
? X86::KMOVWkm_EVEX
: X86::KMOVWkm
))
473 .addReg(Reg0
, RegState::Define
| getDeadRegState(DstIsDead
));
475 BuildMI(MBB
, MBBI
, DL
,
476 TII
->get(HasEGPR
? X86::KMOVWkm_EVEX
: X86::KMOVWkm
))
477 .addReg(Reg1
, RegState::Define
| getDeadRegState(DstIsDead
));
479 for (int i
= 0; i
< X86::AddrNumOperands
; ++i
) {
480 MIBLo
.add(MBBI
->getOperand(1 + i
));
481 if (i
== X86::AddrDisp
)
482 MIBHi
.addImm(Disp
+ 2);
484 MIBHi
.add(MBBI
->getOperand(1 + i
));
487 // Split the memory operand, adjusting the offset and size for the halves.
488 MachineMemOperand
*OldMMO
= MBBI
->memoperands().front();
489 MachineFunction
*MF
= MBB
.getParent();
490 MachineMemOperand
*MMOLo
= MF
->getMachineMemOperand(OldMMO
, 0, 2);
491 MachineMemOperand
*MMOHi
= MF
->getMachineMemOperand(OldMMO
, 2, 2);
493 MIBLo
.setMemRefs(MMOLo
);
494 MIBHi
.setMemRefs(MMOHi
);
496 // Delete the pseudo.
500 case X86::MASKPAIR16STORE
: {
501 int64_t Disp
= MBBI
->getOperand(X86::AddrDisp
).getImm();
502 assert(Disp
>= 0 && Disp
<= INT32_MAX
- 2 && "Unexpected displacement");
503 Register Reg
= MBBI
->getOperand(X86::AddrNumOperands
).getReg();
504 bool SrcIsKill
= MBBI
->getOperand(X86::AddrNumOperands
).isKill();
505 Register Reg0
= TRI
->getSubReg(Reg
, X86::sub_mask_0
);
506 Register Reg1
= TRI
->getSubReg(Reg
, X86::sub_mask_1
);
508 auto MIBLo
= BuildMI(MBB
, MBBI
, DL
,
509 TII
->get(HasEGPR
? X86::KMOVWmk_EVEX
: X86::KMOVWmk
));
510 auto MIBHi
= BuildMI(MBB
, MBBI
, DL
,
511 TII
->get(HasEGPR
? X86::KMOVWmk_EVEX
: X86::KMOVWmk
));
513 for (int i
= 0; i
< X86::AddrNumOperands
; ++i
) {
514 MIBLo
.add(MBBI
->getOperand(i
));
515 if (i
== X86::AddrDisp
)
516 MIBHi
.addImm(Disp
+ 2);
518 MIBHi
.add(MBBI
->getOperand(i
));
520 MIBLo
.addReg(Reg0
, getKillRegState(SrcIsKill
));
521 MIBHi
.addReg(Reg1
, getKillRegState(SrcIsKill
));
523 // Split the memory operand, adjusting the offset and size for the halves.
524 MachineMemOperand
*OldMMO
= MBBI
->memoperands().front();
525 MachineFunction
*MF
= MBB
.getParent();
526 MachineMemOperand
*MMOLo
= MF
->getMachineMemOperand(OldMMO
, 0, 2);
527 MachineMemOperand
*MMOHi
= MF
->getMachineMemOperand(OldMMO
, 2, 2);
529 MIBLo
.setMemRefs(MMOLo
);
530 MIBHi
.setMemRefs(MMOHi
);
532 // Delete the pseudo.
536 case X86::MWAITX_SAVE_RBX
: {
537 // Perform the following transformation.
538 // SaveRbx = pseudomwaitx InArg, SaveRbx
543 const MachineOperand
&InArg
= MBBI
->getOperand(1);
544 // Copy the input argument of the pseudo into the argument of the
545 // actual instruction.
546 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::EBX
, InArg
.getReg(), InArg
.isKill());
547 // Create the actual instruction.
548 BuildMI(MBB
, MBBI
, DL
, TII
->get(X86::MWAITXrrr
));
549 // Finally, restore the value of RBX.
550 Register SaveRbx
= MBBI
->getOperand(2).getReg();
551 TII
->copyPhysReg(MBB
, MBBI
, DL
, X86::RBX
, SaveRbx
, /*SrcIsKill*/ true);
552 // Delete the pseudo.
553 MBBI
->eraseFromParent();
556 case TargetOpcode::ICALL_BRANCH_FUNNEL
:
557 ExpandICallBranchFunnel(&MBB
, MBBI
);
559 #define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC)
560 case X86::PLDTILECFGV
: {
561 MI
.setDesc(TII
->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG
)));
564 case X86::PTILELOADDV
:
565 case X86::PTILELOADDT1V
: {
566 for (unsigned i
= 2; i
> 0; --i
)
568 unsigned Opc
= Opcode
== X86::PTILELOADDV
569 ? GET_EGPR_IF_ENABLED(X86::TILELOADD
)
570 : GET_EGPR_IF_ENABLED(X86::TILELOADDT1
);
571 MI
.setDesc(TII
->get(Opc
));
574 case X86::PTCMMIMFP16PSV
:
575 case X86::PTCMMRLFP16PSV
:
580 case X86::PTDPBF16PSV
:
581 case X86::PTDPFP16PSV
: {
582 MI
.untieRegOperand(4);
583 for (unsigned i
= 3; i
> 0; --i
)
587 case X86::PTCMMIMFP16PSV
: Opc
= X86::TCMMIMFP16PS
; break;
588 case X86::PTCMMRLFP16PSV
: Opc
= X86::TCMMRLFP16PS
; break;
589 case X86::PTDPBSSDV
: Opc
= X86::TDPBSSD
; break;
590 case X86::PTDPBSUDV
: Opc
= X86::TDPBSUD
; break;
591 case X86::PTDPBUSDV
: Opc
= X86::TDPBUSD
; break;
592 case X86::PTDPBUUDV
: Opc
= X86::TDPBUUD
; break;
593 case X86::PTDPBF16PSV
: Opc
= X86::TDPBF16PS
; break;
594 case X86::PTDPFP16PSV
: Opc
= X86::TDPFP16PS
; break;
595 default: llvm_unreachable("Impossible Opcode!");
597 MI
.setDesc(TII
->get(Opc
));
598 MI
.tieOperands(0, 1);
601 case X86::PTILESTOREDV
: {
602 for (int i
= 1; i
>= 0; --i
)
604 MI
.setDesc(TII
->get(GET_EGPR_IF_ENABLED(X86::TILESTORED
)));
607 #undef GET_EGPR_IF_ENABLED
608 case X86::PTILEZEROV
: {
609 for (int i
= 2; i
> 0; --i
) // Remove row, col
611 MI
.setDesc(TII
->get(X86::TILEZERO
));
614 case X86::CALL64pcrel32_RVMARKER
:
615 case X86::CALL64r_RVMARKER
:
616 case X86::CALL64m_RVMARKER
:
617 expandCALL_RVMARKER(MBB
, MBBI
);
620 llvm_unreachable("Previous switch has a fallthrough?");
623 // This function creates additional block for storing varargs guarded
624 // registers. It adds check for %al into entry block, to skip
625 // GuardedRegsBlk if xmm registers should not be stored.
627 // EntryBlk[VAStartPseudoInstr] EntryBlk
630 // | | GuardedRegsBlk
637 void X86ExpandPseudo::ExpandVastartSaveXmmRegs(
638 MachineBasicBlock
*EntryBlk
,
639 MachineBasicBlock::iterator VAStartPseudoInstr
) const {
640 assert(VAStartPseudoInstr
->getOpcode() == X86::VASTART_SAVE_XMM_REGS
);
642 MachineFunction
*Func
= EntryBlk
->getParent();
643 const TargetInstrInfo
*TII
= STI
->getInstrInfo();
644 const DebugLoc
&DL
= VAStartPseudoInstr
->getDebugLoc();
645 Register CountReg
= VAStartPseudoInstr
->getOperand(0).getReg();
647 // Calculate liveins for newly created blocks.
648 LivePhysRegs
LiveRegs(*STI
->getRegisterInfo());
649 SmallVector
<std::pair
<MCPhysReg
, const MachineOperand
*>, 8> Clobbers
;
651 LiveRegs
.addLiveIns(*EntryBlk
);
652 for (MachineInstr
&MI
: EntryBlk
->instrs()) {
653 if (MI
.getOpcode() == VAStartPseudoInstr
->getOpcode())
656 LiveRegs
.stepForward(MI
, Clobbers
);
659 // Create the new basic blocks. One block contains all the XMM stores,
660 // and another block is the final destination regardless of whether any
661 // stores were performed.
662 const BasicBlock
*LLVMBlk
= EntryBlk
->getBasicBlock();
663 MachineFunction::iterator EntryBlkIter
= ++EntryBlk
->getIterator();
664 MachineBasicBlock
*GuardedRegsBlk
= Func
->CreateMachineBasicBlock(LLVMBlk
);
665 MachineBasicBlock
*TailBlk
= Func
->CreateMachineBasicBlock(LLVMBlk
);
666 Func
->insert(EntryBlkIter
, GuardedRegsBlk
);
667 Func
->insert(EntryBlkIter
, TailBlk
);
669 // Transfer the remainder of EntryBlk and its successor edges to TailBlk.
670 TailBlk
->splice(TailBlk
->begin(), EntryBlk
,
671 std::next(MachineBasicBlock::iterator(VAStartPseudoInstr
)),
673 TailBlk
->transferSuccessorsAndUpdatePHIs(EntryBlk
);
675 uint64_t FrameOffset
= VAStartPseudoInstr
->getOperand(4).getImm();
676 uint64_t VarArgsRegsOffset
= VAStartPseudoInstr
->getOperand(6).getImm();
678 // TODO: add support for YMM and ZMM here.
679 unsigned MOVOpc
= STI
->hasAVX() ? X86::VMOVAPSmr
: X86::MOVAPSmr
;
681 // In the XMM save block, save all the XMM argument registers.
682 for (int64_t OpndIdx
= 7, RegIdx
= 0;
683 OpndIdx
< VAStartPseudoInstr
->getNumOperands() - 1;
684 OpndIdx
++, RegIdx
++) {
685 auto NewMI
= BuildMI(GuardedRegsBlk
, DL
, TII
->get(MOVOpc
));
686 for (int i
= 0; i
< X86::AddrNumOperands
; ++i
) {
687 if (i
== X86::AddrDisp
)
688 NewMI
.addImm(FrameOffset
+ VarArgsRegsOffset
+ RegIdx
* 16);
690 NewMI
.add(VAStartPseudoInstr
->getOperand(i
+ 1));
692 NewMI
.addReg(VAStartPseudoInstr
->getOperand(OpndIdx
).getReg());
693 assert(VAStartPseudoInstr
->getOperand(OpndIdx
).getReg().isPhysical());
696 // The original block will now fall through to the GuardedRegsBlk.
697 EntryBlk
->addSuccessor(GuardedRegsBlk
);
698 // The GuardedRegsBlk will fall through to the TailBlk.
699 GuardedRegsBlk
->addSuccessor(TailBlk
);
701 if (!STI
->isCallingConvWin64(Func
->getFunction().getCallingConv())) {
702 // If %al is 0, branch around the XMM save block.
703 BuildMI(EntryBlk
, DL
, TII
->get(X86::TEST8rr
))
706 BuildMI(EntryBlk
, DL
, TII
->get(X86::JCC_1
))
708 .addImm(X86::COND_E
);
709 EntryBlk
->addSuccessor(TailBlk
);
712 // Add liveins to the created block.
713 addLiveIns(*GuardedRegsBlk
, LiveRegs
);
714 addLiveIns(*TailBlk
, LiveRegs
);
716 // Delete the pseudo.
717 VAStartPseudoInstr
->eraseFromParent();
720 /// Expand all pseudo instructions contained in \p MBB.
721 /// \returns true if any expansion occurred for \p MBB.
722 bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock
&MBB
) {
723 bool Modified
= false;
725 // MBBI may be invalidated by the expansion.
726 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
728 MachineBasicBlock::iterator NMBBI
= std::next(MBBI
);
729 Modified
|= ExpandMI(MBB
, MBBI
);
736 bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction
&MF
) {
737 // Currently pseudo which affects control flow is only
738 // X86::VASTART_SAVE_XMM_REGS which is located in Entry block.
739 // So we do not need to evaluate other blocks.
740 for (MachineInstr
&Instr
: MF
.front().instrs()) {
741 if (Instr
.getOpcode() == X86::VASTART_SAVE_XMM_REGS
) {
742 ExpandVastartSaveXmmRegs(&(MF
.front()), Instr
);
750 bool X86ExpandPseudo::runOnMachineFunction(MachineFunction
&MF
) {
751 STI
= &MF
.getSubtarget
<X86Subtarget
>();
752 TII
= STI
->getInstrInfo();
753 TRI
= STI
->getRegisterInfo();
754 X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
755 X86FL
= STI
->getFrameLowering();
757 bool Modified
= ExpandPseudosWhichAffectControlFlow(MF
);
759 for (MachineBasicBlock
&MBB
: MF
)
760 Modified
|= ExpandMBB(MBB
);
764 /// Returns an instance of the pseudo instruction expansion pass.
765 FunctionPass
*llvm::createX86ExpandPseudoPass() {
766 return new X86ExpandPseudo();