1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
13 //===----------------------------------------------------------------------===//
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
33 #define DEBUG_TYPE FIXUPLEA_NAME
35 STATISTIC(NumLEAs
, "Number of LEA instructions created");
38 class FixupLEAPass
: public MachineFunctionPass
{
39 enum RegUsageState
{ RU_NotUsed
, RU_Write
, RU_Read
};
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
46 void seekLEAFixup(MachineOperand
&p
, MachineBasicBlock::iterator
&I
,
47 MachineBasicBlock
&MBB
);
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator
&I
,
54 MachineBasicBlock
&MBB
);
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator
&I
,
59 MachineBasicBlock
&MBB
);
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator
&I
,
74 MachineBasicBlock
&MBB
, bool OptIncDec
);
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator
&I
,
79 MachineBasicBlock
&MBB
, bool OptIncDec
,
80 bool UseLEAForSP
) const;
82 /// Look for and transform the sequence
83 /// lea (reg1, reg2), reg3
88 /// It can also optimize the sequence lea/add similarly.
89 bool optLEAALU(MachineBasicBlock::iterator
&I
, MachineBasicBlock
&MBB
) const;
91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92 /// the dest register of LEA instruction I.
93 MachineBasicBlock::iterator
searchALUInst(MachineBasicBlock::iterator
&I
,
94 MachineBasicBlock
&MBB
) const;
96 /// Check instructions between LeaI and AluI (exclusively).
97 /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98 /// Set AluDestRef to true if the dest register of AluI is used or defined.
99 /// *KilledBase is set to the killed base register usage.
100 /// *KilledIndex is set to the killed index register usage.
101 void checkRegUsage(MachineBasicBlock::iterator
&LeaI
,
102 MachineBasicBlock::iterator
&AluI
, bool &BaseIndexDef
,
103 bool &AluDestRef
, MachineOperand
**KilledBase
,
104 MachineOperand
**KilledIndex
) const;
106 /// Determine if an instruction references a machine register
107 /// and, if so, whether it reads or writes the register.
108 RegUsageState
usesRegister(MachineOperand
&p
, MachineBasicBlock::iterator I
);
110 /// Step backwards through a basic block, looking
111 /// for an instruction which writes a register within
112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113 MachineBasicBlock::iterator
searchBackwards(MachineOperand
&p
,
114 MachineBasicBlock::iterator
&I
,
115 MachineBasicBlock
&MBB
);
117 /// if an instruction can be converted to an
118 /// equivalent LEA, insert the new instruction into the basic block
119 /// and return a pointer to it. Otherwise, return zero.
120 MachineInstr
*postRAConvertToLEA(MachineBasicBlock
&MBB
,
121 MachineBasicBlock::iterator
&MBBI
) const;
126 StringRef
getPassName() const override
{ return FIXUPLEA_DESC
; }
128 FixupLEAPass() : MachineFunctionPass(ID
) { }
130 /// Loop over all of the basic blocks,
131 /// replacing instructions by equivalent LEA instructions
132 /// if needed and when possible.
133 bool runOnMachineFunction(MachineFunction
&MF
) override
;
135 // This pass runs after regalloc and doesn't support VReg operands.
136 MachineFunctionProperties
getRequiredProperties() const override
{
137 return MachineFunctionProperties().set(
138 MachineFunctionProperties::Property::NoVRegs
);
141 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
142 AU
.addRequired
<ProfileSummaryInfoWrapperPass
>();
143 AU
.addRequired
<LazyMachineBlockFrequencyInfoPass
>();
144 MachineFunctionPass::getAnalysisUsage(AU
);
148 TargetSchedModel TSM
;
149 const X86InstrInfo
*TII
= nullptr;
150 const X86RegisterInfo
*TRI
= nullptr;
154 char FixupLEAPass::ID
= 0;
156 INITIALIZE_PASS(FixupLEAPass
, FIXUPLEA_NAME
, FIXUPLEA_DESC
, false, false)
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock
&MBB
,
160 MachineBasicBlock::iterator
&MBBI
) const {
161 MachineInstr
&MI
= *MBBI
;
162 switch (MI
.getOpcode()) {
165 const MachineOperand
&Src
= MI
.getOperand(1);
166 const MachineOperand
&Dest
= MI
.getOperand(0);
167 MachineInstr
*NewMI
=
168 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
169 TII
->get(MI
.getOpcode() == X86::MOV32rr
? X86::LEA32r
181 if (!MI
.isConvertibleTo3Addr())
184 switch (MI
.getOpcode()) {
186 // Only convert instructions that we've verified are safe.
190 case X86::ADD64ri32_DB
:
191 case X86::ADD64ri8_DB
:
194 case X86::ADD32ri_DB
:
195 case X86::ADD32ri8_DB
:
196 if (!MI
.getOperand(2).isImm()) {
197 // convertToThreeAddress will call getImm()
198 // which requires isImm() to be true
209 case X86::ADD64rr_DB
:
211 case X86::ADD32rr_DB
:
212 // These instructions are all fine to convert.
215 MachineFunction::iterator MFI
= MBB
.getIterator();
216 return TII
->convertToThreeAddress(MFI
, MI
, nullptr);
219 FunctionPass
*llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
221 static bool isLEA(unsigned Opcode
) {
222 return Opcode
== X86::LEA32r
|| Opcode
== X86::LEA64r
||
223 Opcode
== X86::LEA64_32r
;
226 bool FixupLEAPass::runOnMachineFunction(MachineFunction
&MF
) {
227 if (skipFunction(MF
.getFunction()))
230 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
231 bool IsSlowLEA
= ST
.slowLEA();
232 bool IsSlow3OpsLEA
= ST
.slow3OpsLEA();
233 bool LEAUsesAG
= ST
.LEAusesAG();
235 bool OptIncDec
= !ST
.slowIncDec() || MF
.getFunction().hasOptSize();
236 bool UseLEAForSP
= ST
.useLeaForSP();
239 TII
= ST
.getInstrInfo();
240 TRI
= ST
.getRegisterInfo();
241 auto *PSI
= &getAnalysis
<ProfileSummaryInfoWrapperPass
>().getPSI();
242 auto *MBFI
= (PSI
&& PSI
->hasProfileSummary())
243 ? &getAnalysis
<LazyMachineBlockFrequencyInfoPass
>().getBFI()
246 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
247 for (MachineBasicBlock
&MBB
: MF
) {
248 // First pass. Try to remove or optimize existing LEAs.
249 bool OptIncDecPerBB
=
250 OptIncDec
|| llvm::shouldOptimizeForSize(&MBB
, PSI
, MBFI
);
251 for (MachineBasicBlock::iterator I
= MBB
.begin(); I
!= MBB
.end(); ++I
) {
252 if (!isLEA(I
->getOpcode()))
255 if (optTwoAddrLEA(I
, MBB
, OptIncDecPerBB
, UseLEAForSP
))
259 processInstructionForSlowLEA(I
, MBB
);
260 else if (IsSlow3OpsLEA
)
261 processInstrForSlow3OpLEA(I
, MBB
, OptIncDecPerBB
);
264 // Second pass for creating LEAs. This may reverse some of the
265 // transformations above.
267 for (MachineBasicBlock::iterator I
= MBB
.begin(); I
!= MBB
.end(); ++I
)
268 processInstruction(I
, MBB
);
272 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
277 FixupLEAPass::RegUsageState
278 FixupLEAPass::usesRegister(MachineOperand
&p
, MachineBasicBlock::iterator I
) {
279 RegUsageState RegUsage
= RU_NotUsed
;
280 MachineInstr
&MI
= *I
;
282 for (unsigned i
= 0; i
< MI
.getNumOperands(); ++i
) {
283 MachineOperand
&opnd
= MI
.getOperand(i
);
284 if (opnd
.isReg() && opnd
.getReg() == p
.getReg()) {
293 /// getPreviousInstr - Given a reference to an instruction in a basic
294 /// block, return a reference to the previous instruction in the block,
295 /// wrapping around to the last instruction of the block if the block
296 /// branches to itself.
297 static inline bool getPreviousInstr(MachineBasicBlock::iterator
&I
,
298 MachineBasicBlock
&MBB
) {
299 if (I
== MBB
.begin()) {
300 if (MBB
.isPredecessor(&MBB
)) {
310 MachineBasicBlock::iterator
311 FixupLEAPass::searchBackwards(MachineOperand
&p
, MachineBasicBlock::iterator
&I
,
312 MachineBasicBlock
&MBB
) {
313 int InstrDistance
= 1;
314 MachineBasicBlock::iterator CurInst
;
315 static const int INSTR_DISTANCE_THRESHOLD
= 5;
319 Found
= getPreviousInstr(CurInst
, MBB
);
320 while (Found
&& I
!= CurInst
) {
321 if (CurInst
->isCall() || CurInst
->isInlineAsm())
323 if (InstrDistance
> INSTR_DISTANCE_THRESHOLD
)
324 break; // too far back to make a difference
325 if (usesRegister(p
, CurInst
) == RU_Write
) {
328 InstrDistance
+= TSM
.computeInstrLatency(&*CurInst
);
329 Found
= getPreviousInstr(CurInst
, MBB
);
331 return MachineBasicBlock::iterator();
334 static inline bool isInefficientLEAReg(unsigned Reg
) {
335 return Reg
== X86::EBP
|| Reg
== X86::RBP
||
336 Reg
== X86::R13D
|| Reg
== X86::R13
;
339 /// Returns true if this LEA uses base an index registers, and the base register
340 /// is known to be inefficient for the subtarget.
341 // TODO: use a variant scheduling class to model the latency profile
342 // of LEA instructions, and implement this logic as a scheduling predicate.
343 static inline bool hasInefficientLEABaseReg(const MachineOperand
&Base
,
344 const MachineOperand
&Index
) {
345 return Base
.isReg() && isInefficientLEAReg(Base
.getReg()) && Index
.isReg() &&
346 Index
.getReg() != X86::NoRegister
;
349 static inline bool hasLEAOffset(const MachineOperand
&Offset
) {
350 return (Offset
.isImm() && Offset
.getImm() != 0) || Offset
.isGlobal();
353 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode
) {
356 llvm_unreachable("Unexpected LEA instruction");
365 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode
) {
368 llvm_unreachable("Unexpected LEA instruction");
377 static inline unsigned getADDriFromLEA(unsigned LEAOpcode
,
378 const MachineOperand
&Offset
) {
379 bool IsInt8
= Offset
.isImm() && isInt
<8>(Offset
.getImm());
382 llvm_unreachable("Unexpected LEA instruction");
385 return IsInt8
? X86::ADD32ri8
: X86::ADD32ri
;
387 return IsInt8
? X86::ADD64ri8
: X86::ADD64ri32
;
391 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode
, bool IsINC
) {
394 llvm_unreachable("Unexpected LEA instruction");
397 return IsINC
? X86::INC32r
: X86::DEC32r
;
399 return IsINC
? X86::INC64r
: X86::DEC64r
;
403 MachineBasicBlock::iterator
404 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator
&I
,
405 MachineBasicBlock
&MBB
) const {
406 const int InstrDistanceThreshold
= 5;
407 int InstrDistance
= 1;
408 MachineBasicBlock::iterator CurInst
= std::next(I
);
410 unsigned LEAOpcode
= I
->getOpcode();
411 unsigned AddOpcode
= getADDrrFromLEA(LEAOpcode
);
412 unsigned SubOpcode
= getSUBrrFromLEA(LEAOpcode
);
413 Register DestReg
= I
->getOperand(0).getReg();
415 while (CurInst
!= MBB
.end()) {
416 if (CurInst
->isCall() || CurInst
->isInlineAsm())
418 if (InstrDistance
> InstrDistanceThreshold
)
421 // Check if the lea dest register is used in an add/sub instruction only.
422 for (unsigned I
= 0, E
= CurInst
->getNumOperands(); I
!= E
; ++I
) {
423 MachineOperand
&Opnd
= CurInst
->getOperand(I
);
425 if (Opnd
.getReg() == DestReg
) {
426 if (Opnd
.isDef() || !Opnd
.isKill())
427 return MachineBasicBlock::iterator();
429 unsigned AluOpcode
= CurInst
->getOpcode();
430 if (AluOpcode
!= AddOpcode
&& AluOpcode
!= SubOpcode
)
431 return MachineBasicBlock::iterator();
433 MachineOperand
&Opnd2
= CurInst
->getOperand(3 - I
);
434 MachineOperand AluDest
= CurInst
->getOperand(0);
435 if (Opnd2
.getReg() != AluDest
.getReg())
436 return MachineBasicBlock::iterator();
438 // X - (Y + Z) may generate different flags than (X - Y) - Z when
439 // there is overflow. So we can't change the alu instruction if the
440 // flags register is live.
441 if (!CurInst
->registerDefIsDead(X86::EFLAGS
, TRI
))
442 return MachineBasicBlock::iterator();
446 if (TRI
->regsOverlap(DestReg
, Opnd
.getReg()))
447 return MachineBasicBlock::iterator();
454 return MachineBasicBlock::iterator();
457 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator
&LeaI
,
458 MachineBasicBlock::iterator
&AluI
,
459 bool &BaseIndexDef
, bool &AluDestRef
,
460 MachineOperand
**KilledBase
,
461 MachineOperand
**KilledIndex
) const {
462 BaseIndexDef
= AluDestRef
= false;
463 *KilledBase
= *KilledIndex
= nullptr;
464 Register BaseReg
= LeaI
->getOperand(1 + X86::AddrBaseReg
).getReg();
465 Register IndexReg
= LeaI
->getOperand(1 + X86::AddrIndexReg
).getReg();
466 Register AluDestReg
= AluI
->getOperand(0).getReg();
468 MachineBasicBlock::iterator CurInst
= std::next(LeaI
);
469 while (CurInst
!= AluI
) {
470 for (unsigned I
= 0, E
= CurInst
->getNumOperands(); I
!= E
; ++I
) {
471 MachineOperand
&Opnd
= CurInst
->getOperand(I
);
474 Register Reg
= Opnd
.getReg();
475 if (TRI
->regsOverlap(Reg
, AluDestReg
))
477 if (TRI
->regsOverlap(Reg
, BaseReg
)) {
480 else if (Opnd
.isKill())
483 if (TRI
->regsOverlap(Reg
, IndexReg
)) {
486 else if (Opnd
.isKill())
487 *KilledIndex
= &Opnd
;
494 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator
&I
,
495 MachineBasicBlock
&MBB
) const {
496 // Look for an add/sub instruction which uses the result of lea.
497 MachineBasicBlock::iterator AluI
= searchALUInst(I
, MBB
);
498 if (AluI
== MachineBasicBlock::iterator())
501 // Check if there are any related register usage between lea and alu.
502 bool BaseIndexDef
, AluDestRef
;
503 MachineOperand
*KilledBase
, *KilledIndex
;
504 checkRegUsage(I
, AluI
, BaseIndexDef
, AluDestRef
, &KilledBase
, &KilledIndex
);
506 MachineBasicBlock::iterator InsertPos
= AluI
;
511 KilledBase
= KilledIndex
= nullptr;
514 // Check if there are same registers.
515 Register AluDestReg
= AluI
->getOperand(0).getReg();
516 Register BaseReg
= I
->getOperand(1 + X86::AddrBaseReg
).getReg();
517 Register IndexReg
= I
->getOperand(1 + X86::AddrIndexReg
).getReg();
518 if (I
->getOpcode() == X86::LEA64_32r
) {
519 BaseReg
= TRI
->getSubReg(BaseReg
, X86::sub_32bit
);
520 IndexReg
= TRI
->getSubReg(IndexReg
, X86::sub_32bit
);
522 if (AluDestReg
== IndexReg
) {
523 if (BaseReg
== IndexReg
)
525 std::swap(BaseReg
, IndexReg
);
526 std::swap(KilledBase
, KilledIndex
);
528 if (BaseReg
== IndexReg
)
529 KilledBase
= nullptr;
531 // Now it's safe to change instructions.
532 MachineInstr
*NewMI1
, *NewMI2
;
533 unsigned NewOpcode
= AluI
->getOpcode();
534 NewMI1
= BuildMI(MBB
, InsertPos
, AluI
->getDebugLoc(), TII
->get(NewOpcode
),
536 .addReg(AluDestReg
, RegState::Kill
)
537 .addReg(BaseReg
, KilledBase
? RegState::Kill
: 0);
538 NewMI1
->addRegisterDead(X86::EFLAGS
, TRI
);
539 NewMI2
= BuildMI(MBB
, InsertPos
, AluI
->getDebugLoc(), TII
->get(NewOpcode
),
541 .addReg(AluDestReg
, RegState::Kill
)
542 .addReg(IndexReg
, KilledIndex
? RegState::Kill
: 0);
543 NewMI2
->addRegisterDead(X86::EFLAGS
, TRI
);
545 // Clear the old Kill flags.
547 KilledBase
->setIsKill(false);
549 KilledIndex
->setIsKill(false);
551 MBB
.getParent()->substituteDebugValuesForInst(*AluI
, *NewMI1
, 1);
552 MBB
.getParent()->substituteDebugValuesForInst(*AluI
, *NewMI2
, 1);
559 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator
&I
,
560 MachineBasicBlock
&MBB
, bool OptIncDec
,
561 bool UseLEAForSP
) const {
562 MachineInstr
&MI
= *I
;
564 const MachineOperand
&Base
= MI
.getOperand(1 + X86::AddrBaseReg
);
565 const MachineOperand
&Scale
= MI
.getOperand(1 + X86::AddrScaleAmt
);
566 const MachineOperand
&Index
= MI
.getOperand(1 + X86::AddrIndexReg
);
567 const MachineOperand
&Disp
= MI
.getOperand(1 + X86::AddrDisp
);
568 const MachineOperand
&Segment
= MI
.getOperand(1 + X86::AddrSegmentReg
);
570 if (Segment
.getReg() != 0 || !Disp
.isImm() || Scale
.getImm() > 1 ||
571 MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, I
) !=
572 MachineBasicBlock::LQR_Dead
)
575 Register DestReg
= MI
.getOperand(0).getReg();
576 Register BaseReg
= Base
.getReg();
577 Register IndexReg
= Index
.getReg();
579 // Don't change stack adjustment LEAs.
580 if (UseLEAForSP
&& (DestReg
== X86::ESP
|| DestReg
== X86::RSP
))
583 // LEA64_32 has 64-bit operands but 32-bit result.
584 if (MI
.getOpcode() == X86::LEA64_32r
) {
586 BaseReg
= TRI
->getSubReg(BaseReg
, X86::sub_32bit
);
588 IndexReg
= TRI
->getSubReg(IndexReg
, X86::sub_32bit
);
591 MachineInstr
*NewMI
= nullptr;
594 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
595 // which can be turned into add %reg2, %reg1
596 if (BaseReg
!= 0 && IndexReg
!= 0 && Disp
.getImm() == 0 &&
597 (DestReg
== BaseReg
|| DestReg
== IndexReg
)) {
598 unsigned NewOpcode
= getADDrrFromLEA(MI
.getOpcode());
599 if (DestReg
!= BaseReg
)
600 std::swap(BaseReg
, IndexReg
);
602 if (MI
.getOpcode() == X86::LEA64_32r
) {
603 // TODO: Do we need the super register implicit use?
604 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
605 .addReg(BaseReg
).addReg(IndexReg
)
606 .addReg(Base
.getReg(), RegState::Implicit
)
607 .addReg(Index
.getReg(), RegState::Implicit
);
609 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
610 .addReg(BaseReg
).addReg(IndexReg
);
612 } else if (DestReg
== BaseReg
&& IndexReg
== 0) {
614 // This is an LEA with only a base register and a displacement,
615 // We can use ADDri or INC/DEC.
617 // Does this LEA have one these forms:
619 // lea %reg, -1(%reg)
620 if (OptIncDec
&& (Disp
.getImm() == 1 || Disp
.getImm() == -1)) {
621 bool IsINC
= Disp
.getImm() == 1;
622 unsigned NewOpcode
= getINCDECFromLEA(MI
.getOpcode(), IsINC
);
624 if (MI
.getOpcode() == X86::LEA64_32r
) {
625 // TODO: Do we need the super register implicit use?
626 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
627 .addReg(BaseReg
).addReg(Base
.getReg(), RegState::Implicit
);
629 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
633 unsigned NewOpcode
= getADDriFromLEA(MI
.getOpcode(), Disp
);
634 if (MI
.getOpcode() == X86::LEA64_32r
) {
635 // TODO: Do we need the super register implicit use?
636 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
637 .addReg(BaseReg
).addImm(Disp
.getImm())
638 .addReg(Base
.getReg(), RegState::Implicit
);
640 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
641 .addReg(BaseReg
).addImm(Disp
.getImm());
644 } else if (BaseReg
!= 0 && IndexReg
!= 0 && Disp
.getImm() == 0) {
646 // Look for and transform the sequence
647 // lea (reg1, reg2), reg3
649 return optLEAALU(I
, MBB
);
653 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
659 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator
&I
,
660 MachineBasicBlock
&MBB
) {
661 // Process a load, store, or LEA instruction.
662 MachineInstr
&MI
= *I
;
663 const MCInstrDesc
&Desc
= MI
.getDesc();
664 int AddrOffset
= X86II::getMemoryOperandNo(Desc
.TSFlags
);
665 if (AddrOffset
>= 0) {
666 AddrOffset
+= X86II::getOperandBias(Desc
);
667 MachineOperand
&p
= MI
.getOperand(AddrOffset
+ X86::AddrBaseReg
);
668 if (p
.isReg() && p
.getReg() != X86::ESP
) {
669 seekLEAFixup(p
, I
, MBB
);
671 MachineOperand
&q
= MI
.getOperand(AddrOffset
+ X86::AddrIndexReg
);
672 if (q
.isReg() && q
.getReg() != X86::ESP
) {
673 seekLEAFixup(q
, I
, MBB
);
678 void FixupLEAPass::seekLEAFixup(MachineOperand
&p
,
679 MachineBasicBlock::iterator
&I
,
680 MachineBasicBlock
&MBB
) {
681 MachineBasicBlock::iterator MBI
= searchBackwards(p
, I
, MBB
);
682 if (MBI
!= MachineBasicBlock::iterator()) {
683 MachineInstr
*NewMI
= postRAConvertToLEA(MBB
, MBI
);
686 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI
->dump(););
687 // now to replace with an equivalent LEA...
688 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI
->dump(););
689 MBB
.getParent()->substituteDebugValuesForInst(*MBI
, *NewMI
, 1);
691 MachineBasicBlock::iterator J
=
692 static_cast<MachineBasicBlock::iterator
>(NewMI
);
693 processInstruction(J
, MBB
);
698 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator
&I
,
699 MachineBasicBlock
&MBB
) {
700 MachineInstr
&MI
= *I
;
701 const unsigned Opcode
= MI
.getOpcode();
703 const MachineOperand
&Dst
= MI
.getOperand(0);
704 const MachineOperand
&Base
= MI
.getOperand(1 + X86::AddrBaseReg
);
705 const MachineOperand
&Scale
= MI
.getOperand(1 + X86::AddrScaleAmt
);
706 const MachineOperand
&Index
= MI
.getOperand(1 + X86::AddrIndexReg
);
707 const MachineOperand
&Offset
= MI
.getOperand(1 + X86::AddrDisp
);
708 const MachineOperand
&Segment
= MI
.getOperand(1 + X86::AddrSegmentReg
);
710 if (Segment
.getReg() != 0 || !Offset
.isImm() ||
711 MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, I
, 4) !=
712 MachineBasicBlock::LQR_Dead
)
714 const Register DstR
= Dst
.getReg();
715 const Register SrcR1
= Base
.getReg();
716 const Register SrcR2
= Index
.getReg();
717 if ((SrcR1
== 0 || SrcR1
!= DstR
) && (SrcR2
== 0 || SrcR2
!= DstR
))
719 if (Scale
.getImm() > 1)
721 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I
->dump(););
722 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
723 MachineInstr
*NewMI
= nullptr;
724 // Make ADD instruction for two registers writing to LEA's destination
725 if (SrcR1
!= 0 && SrcR2
!= 0) {
726 const MCInstrDesc
&ADDrr
= TII
->get(getADDrrFromLEA(Opcode
));
727 const MachineOperand
&Src
= SrcR1
== DstR
? Index
: Base
;
729 BuildMI(MBB
, I
, MI
.getDebugLoc(), ADDrr
, DstR
).addReg(DstR
).add(Src
);
730 LLVM_DEBUG(NewMI
->dump(););
732 // Make ADD instruction for immediate
733 if (Offset
.getImm() != 0) {
734 const MCInstrDesc
&ADDri
=
735 TII
->get(getADDriFromLEA(Opcode
, Offset
));
736 const MachineOperand
&SrcR
= SrcR1
== DstR
? Base
: Index
;
737 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), ADDri
, DstR
)
739 .addImm(Offset
.getImm());
740 LLVM_DEBUG(NewMI
->dump(););
743 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
749 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator
&I
,
750 MachineBasicBlock
&MBB
,
752 MachineInstr
&MI
= *I
;
753 const unsigned LEAOpcode
= MI
.getOpcode();
755 const MachineOperand
&Dest
= MI
.getOperand(0);
756 const MachineOperand
&Base
= MI
.getOperand(1 + X86::AddrBaseReg
);
757 const MachineOperand
&Scale
= MI
.getOperand(1 + X86::AddrScaleAmt
);
758 const MachineOperand
&Index
= MI
.getOperand(1 + X86::AddrIndexReg
);
759 const MachineOperand
&Offset
= MI
.getOperand(1 + X86::AddrDisp
);
760 const MachineOperand
&Segment
= MI
.getOperand(1 + X86::AddrSegmentReg
);
762 if (!(TII
->isThreeOperandsLEA(MI
) || hasInefficientLEABaseReg(Base
, Index
)) ||
763 MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, I
, 4) !=
764 MachineBasicBlock::LQR_Dead
||
765 Segment
.getReg() != X86::NoRegister
)
768 Register DestReg
= Dest
.getReg();
769 Register BaseReg
= Base
.getReg();
770 Register IndexReg
= Index
.getReg();
772 if (MI
.getOpcode() == X86::LEA64_32r
) {
774 BaseReg
= TRI
->getSubReg(BaseReg
, X86::sub_32bit
);
776 IndexReg
= TRI
->getSubReg(IndexReg
, X86::sub_32bit
);
779 bool IsScale1
= Scale
.getImm() == 1;
780 bool IsInefficientBase
= isInefficientLEAReg(BaseReg
);
781 bool IsInefficientIndex
= isInefficientLEAReg(IndexReg
);
783 // Skip these cases since it takes more than 2 instructions
784 // to replace the LEA instruction.
785 if (IsInefficientBase
&& DestReg
== BaseReg
&& !IsScale1
)
788 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI
.dump(););
789 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
791 MachineInstr
*NewMI
= nullptr;
793 // First try to replace LEA with one or two (for the 3-op LEA case)
795 // 1.lea (%base,%index,1), %base => add %index,%base
796 // 2.lea (%base,%index,1), %index => add %base,%index
797 if (IsScale1
&& (DestReg
== BaseReg
|| DestReg
== IndexReg
)) {
798 unsigned NewOpc
= getADDrrFromLEA(MI
.getOpcode());
799 if (DestReg
!= BaseReg
)
800 std::swap(BaseReg
, IndexReg
);
802 if (MI
.getOpcode() == X86::LEA64_32r
) {
803 // TODO: Do we need the super register implicit use?
804 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
807 .addReg(Base
.getReg(), RegState::Implicit
)
808 .addReg(Index
.getReg(), RegState::Implicit
);
810 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
814 } else if (!IsInefficientBase
|| (!IsInefficientIndex
&& IsScale1
)) {
815 // If the base is inefficient try switching the index and base operands,
816 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
817 // lea offset(%base,%index,scale),%dst =>
818 // lea (%base,%index,scale); add offset,%dst
819 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(LEAOpcode
))
821 .add(IsInefficientBase
? Index
: Base
)
823 .add(IsInefficientBase
? Base
: Index
)
826 LLVM_DEBUG(NewMI
->dump(););
829 // If either replacement succeeded above, add the offset if needed, then
830 // replace the instruction.
832 // Create ADD instruction for the Offset in case of 3-Ops LEA.
833 if (hasLEAOffset(Offset
)) {
834 if (OptIncDec
&& Offset
.isImm() &&
835 (Offset
.getImm() == 1 || Offset
.getImm() == -1)) {
837 getINCDECFromLEA(MI
.getOpcode(), Offset
.getImm() == 1);
838 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
840 LLVM_DEBUG(NewMI
->dump(););
842 unsigned NewOpc
= getADDriFromLEA(MI
.getOpcode(), Offset
);
843 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
846 LLVM_DEBUG(NewMI
->dump(););
850 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
856 // Handle the rest of the cases with inefficient base register:
857 assert(DestReg
!= BaseReg
&& "DestReg == BaseReg should be handled already!");
858 assert(IsInefficientBase
&& "efficient base should be handled already!");
860 // FIXME: Handle LEA64_32r.
861 if (LEAOpcode
== X86::LEA64_32r
)
864 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
865 if (IsScale1
&& !hasLEAOffset(Offset
)) {
866 bool BIK
= Base
.isKill() && BaseReg
!= IndexReg
;
867 TII
->copyPhysReg(MBB
, MI
, MI
.getDebugLoc(), DestReg
, BaseReg
, BIK
);
868 LLVM_DEBUG(MI
.getPrevNode()->dump(););
870 unsigned NewOpc
= getADDrrFromLEA(MI
.getOpcode());
871 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
874 LLVM_DEBUG(NewMI
->dump(););
876 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
882 // lea offset(%base,%index,scale), %dst =>
883 // lea offset( ,%index,scale), %dst; add %base,%dst
884 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(LEAOpcode
))
891 LLVM_DEBUG(NewMI
->dump(););
893 unsigned NewOpc
= getADDrrFromLEA(MI
.getOpcode());
894 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
897 LLVM_DEBUG(NewMI
->dump(););
899 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);