1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the pass that finds instructions that can be
10 // re-written as LEA instructions in order to reduce pipeline delays.
11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
13 //===----------------------------------------------------------------------===//
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ProfileSummaryInfo.h"
20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineSizeOpts.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
30 #define FIXUPLEA_DESC "X86 LEA Fixup"
31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
33 #define DEBUG_TYPE FIXUPLEA_NAME
35 STATISTIC(NumLEAs
, "Number of LEA instructions created");
38 class FixupLEAPass
: public MachineFunctionPass
{
39 enum RegUsageState
{ RU_NotUsed
, RU_Write
, RU_Read
};
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
46 void seekLEAFixup(MachineOperand
&p
, MachineBasicBlock::iterator
&I
,
47 MachineBasicBlock
&MBB
);
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator
&I
,
54 MachineBasicBlock
&MBB
);
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator
&I
,
59 MachineBasicBlock
&MBB
);
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator
&I
,
74 MachineBasicBlock
&MBB
, bool OptIncDec
);
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator
&I
,
79 MachineBasicBlock
&MBB
, bool OptIncDec
,
80 bool UseLEAForSP
) const;
82 /// Look for and transform the sequence
83 /// lea (reg1, reg2), reg3
88 /// It can also optimize the sequence lea/add similarly.
89 bool optLEAALU(MachineBasicBlock::iterator
&I
, MachineBasicBlock
&MBB
) const;
91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92 /// the dest register of LEA instruction I.
93 MachineBasicBlock::iterator
searchALUInst(MachineBasicBlock::iterator
&I
,
94 MachineBasicBlock
&MBB
) const;
96 /// Check instructions between LeaI and AluI (exclusively).
97 /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98 /// Set AluDestRef to true if the dest register of AluI is used or defined.
99 /// *KilledBase is set to the killed base register usage.
100 /// *KilledIndex is set to the killed index register usage.
101 void checkRegUsage(MachineBasicBlock::iterator
&LeaI
,
102 MachineBasicBlock::iterator
&AluI
, bool &BaseIndexDef
,
103 bool &AluDestRef
, MachineOperand
**KilledBase
,
104 MachineOperand
**KilledIndex
) const;
106 /// Determine if an instruction references a machine register
107 /// and, if so, whether it reads or writes the register.
108 RegUsageState
usesRegister(MachineOperand
&p
, MachineBasicBlock::iterator I
);
110 /// Step backwards through a basic block, looking
111 /// for an instruction which writes a register within
112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113 MachineBasicBlock::iterator
searchBackwards(MachineOperand
&p
,
114 MachineBasicBlock::iterator
&I
,
115 MachineBasicBlock
&MBB
);
117 /// if an instruction can be converted to an
118 /// equivalent LEA, insert the new instruction into the basic block
119 /// and return a pointer to it. Otherwise, return zero.
120 MachineInstr
*postRAConvertToLEA(MachineBasicBlock
&MBB
,
121 MachineBasicBlock::iterator
&MBBI
) const;
126 StringRef
getPassName() const override
{ return FIXUPLEA_DESC
; }
128 FixupLEAPass() : MachineFunctionPass(ID
) { }
130 /// Loop over all of the basic blocks,
131 /// replacing instructions by equivalent LEA instructions
132 /// if needed and when possible.
133 bool runOnMachineFunction(MachineFunction
&MF
) override
;
135 // This pass runs after regalloc and doesn't support VReg operands.
136 MachineFunctionProperties
getRequiredProperties() const override
{
137 return MachineFunctionProperties().set(
138 MachineFunctionProperties::Property::NoVRegs
);
141 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
142 AU
.addRequired
<ProfileSummaryInfoWrapperPass
>();
143 AU
.addRequired
<LazyMachineBlockFrequencyInfoPass
>();
144 MachineFunctionPass::getAnalysisUsage(AU
);
148 TargetSchedModel TSM
;
149 const X86InstrInfo
*TII
= nullptr;
150 const X86RegisterInfo
*TRI
= nullptr;
154 char FixupLEAPass::ID
= 0;
156 INITIALIZE_PASS(FixupLEAPass
, FIXUPLEA_NAME
, FIXUPLEA_DESC
, false, false)
159 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock
&MBB
,
160 MachineBasicBlock::iterator
&MBBI
) const {
161 MachineInstr
&MI
= *MBBI
;
162 switch (MI
.getOpcode()) {
165 const MachineOperand
&Src
= MI
.getOperand(1);
166 const MachineOperand
&Dest
= MI
.getOperand(0);
167 MachineInstr
*NewMI
=
168 BuildMI(MBB
, MBBI
, MI
.getDebugLoc(),
169 TII
->get(MI
.getOpcode() == X86::MOV32rr
? X86::LEA32r
181 if (!MI
.isConvertibleTo3Addr())
184 switch (MI
.getOpcode()) {
186 // Only convert instructions that we've verified are safe.
189 case X86::ADD64ri32_DB
:
191 case X86::ADD32ri_DB
:
192 if (!MI
.getOperand(2).isImm()) {
193 // convertToThreeAddress will call getImm()
194 // which requires isImm() to be true
205 case X86::ADD64rr_DB
:
207 case X86::ADD32rr_DB
:
208 // These instructions are all fine to convert.
211 return TII
->convertToThreeAddress(MI
, nullptr, nullptr);
214 FunctionPass
*llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
216 static bool isLEA(unsigned Opcode
) {
217 return Opcode
== X86::LEA32r
|| Opcode
== X86::LEA64r
||
218 Opcode
== X86::LEA64_32r
;
221 bool FixupLEAPass::runOnMachineFunction(MachineFunction
&MF
) {
222 if (skipFunction(MF
.getFunction()))
225 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
226 bool IsSlowLEA
= ST
.slowLEA();
227 bool IsSlow3OpsLEA
= ST
.slow3OpsLEA();
228 bool LEAUsesAG
= ST
.leaUsesAG();
230 bool OptIncDec
= !ST
.slowIncDec() || MF
.getFunction().hasOptSize();
231 bool UseLEAForSP
= ST
.useLeaForSP();
234 TII
= ST
.getInstrInfo();
235 TRI
= ST
.getRegisterInfo();
236 auto *PSI
= &getAnalysis
<ProfileSummaryInfoWrapperPass
>().getPSI();
237 auto *MBFI
= (PSI
&& PSI
->hasProfileSummary())
238 ? &getAnalysis
<LazyMachineBlockFrequencyInfoPass
>().getBFI()
241 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
242 for (MachineBasicBlock
&MBB
: MF
) {
243 // First pass. Try to remove or optimize existing LEAs.
244 bool OptIncDecPerBB
=
245 OptIncDec
|| llvm::shouldOptimizeForSize(&MBB
, PSI
, MBFI
);
246 for (MachineBasicBlock::iterator I
= MBB
.begin(); I
!= MBB
.end(); ++I
) {
247 if (!isLEA(I
->getOpcode()))
250 if (optTwoAddrLEA(I
, MBB
, OptIncDecPerBB
, UseLEAForSP
))
254 processInstructionForSlowLEA(I
, MBB
);
255 else if (IsSlow3OpsLEA
)
256 processInstrForSlow3OpLEA(I
, MBB
, OptIncDecPerBB
);
259 // Second pass for creating LEAs. This may reverse some of the
260 // transformations above.
262 for (MachineBasicBlock::iterator I
= MBB
.begin(); I
!= MBB
.end(); ++I
)
263 processInstruction(I
, MBB
);
267 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
272 FixupLEAPass::RegUsageState
273 FixupLEAPass::usesRegister(MachineOperand
&p
, MachineBasicBlock::iterator I
) {
274 RegUsageState RegUsage
= RU_NotUsed
;
275 MachineInstr
&MI
= *I
;
277 for (const MachineOperand
&MO
: MI
.operands()) {
278 if (MO
.isReg() && MO
.getReg() == p
.getReg()) {
287 /// getPreviousInstr - Given a reference to an instruction in a basic
288 /// block, return a reference to the previous instruction in the block,
289 /// wrapping around to the last instruction of the block if the block
290 /// branches to itself.
291 static inline bool getPreviousInstr(MachineBasicBlock::iterator
&I
,
292 MachineBasicBlock
&MBB
) {
293 if (I
== MBB
.begin()) {
294 if (MBB
.isPredecessor(&MBB
)) {
304 MachineBasicBlock::iterator
305 FixupLEAPass::searchBackwards(MachineOperand
&p
, MachineBasicBlock::iterator
&I
,
306 MachineBasicBlock
&MBB
) {
307 int InstrDistance
= 1;
308 MachineBasicBlock::iterator CurInst
;
309 static const int INSTR_DISTANCE_THRESHOLD
= 5;
313 Found
= getPreviousInstr(CurInst
, MBB
);
314 while (Found
&& I
!= CurInst
) {
315 if (CurInst
->isCall() || CurInst
->isInlineAsm())
317 if (InstrDistance
> INSTR_DISTANCE_THRESHOLD
)
318 break; // too far back to make a difference
319 if (usesRegister(p
, CurInst
) == RU_Write
) {
322 InstrDistance
+= TSM
.computeInstrLatency(&*CurInst
);
323 Found
= getPreviousInstr(CurInst
, MBB
);
325 return MachineBasicBlock::iterator();
328 static inline bool isInefficientLEAReg(unsigned Reg
) {
329 return Reg
== X86::EBP
|| Reg
== X86::RBP
||
330 Reg
== X86::R13D
|| Reg
== X86::R13
;
333 /// Returns true if this LEA uses base and index registers, and the base
334 /// register is known to be inefficient for the subtarget.
335 // TODO: use a variant scheduling class to model the latency profile
336 // of LEA instructions, and implement this logic as a scheduling predicate.
337 static inline bool hasInefficientLEABaseReg(const MachineOperand
&Base
,
338 const MachineOperand
&Index
) {
339 return Base
.isReg() && isInefficientLEAReg(Base
.getReg()) && Index
.isReg() &&
340 Index
.getReg() != X86::NoRegister
;
343 static inline bool hasLEAOffset(const MachineOperand
&Offset
) {
344 return (Offset
.isImm() && Offset
.getImm() != 0) || Offset
.isGlobal() ||
345 Offset
.isBlockAddress();
348 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode
) {
351 llvm_unreachable("Unexpected LEA instruction");
360 static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode
) {
363 llvm_unreachable("Unexpected LEA instruction");
372 static inline unsigned getADDriFromLEA(unsigned LEAOpcode
,
373 const MachineOperand
&Offset
) {
376 llvm_unreachable("Unexpected LEA instruction");
381 return X86::ADD64ri32
;
385 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode
, bool IsINC
) {
388 llvm_unreachable("Unexpected LEA instruction");
391 return IsINC
? X86::INC32r
: X86::DEC32r
;
393 return IsINC
? X86::INC64r
: X86::DEC64r
;
397 MachineBasicBlock::iterator
398 FixupLEAPass::searchALUInst(MachineBasicBlock::iterator
&I
,
399 MachineBasicBlock
&MBB
) const {
400 const int InstrDistanceThreshold
= 5;
401 int InstrDistance
= 1;
402 MachineBasicBlock::iterator CurInst
= std::next(I
);
404 unsigned LEAOpcode
= I
->getOpcode();
405 unsigned AddOpcode
= getADDrrFromLEA(LEAOpcode
);
406 unsigned SubOpcode
= getSUBrrFromLEA(LEAOpcode
);
407 Register DestReg
= I
->getOperand(0).getReg();
409 while (CurInst
!= MBB
.end()) {
410 if (CurInst
->isCall() || CurInst
->isInlineAsm())
412 if (InstrDistance
> InstrDistanceThreshold
)
415 // Check if the lea dest register is used in an add/sub instruction only.
416 for (unsigned I
= 0, E
= CurInst
->getNumOperands(); I
!= E
; ++I
) {
417 MachineOperand
&Opnd
= CurInst
->getOperand(I
);
419 if (Opnd
.getReg() == DestReg
) {
420 if (Opnd
.isDef() || !Opnd
.isKill())
421 return MachineBasicBlock::iterator();
423 unsigned AluOpcode
= CurInst
->getOpcode();
424 if (AluOpcode
!= AddOpcode
&& AluOpcode
!= SubOpcode
)
425 return MachineBasicBlock::iterator();
427 MachineOperand
&Opnd2
= CurInst
->getOperand(3 - I
);
428 MachineOperand AluDest
= CurInst
->getOperand(0);
429 if (Opnd2
.getReg() != AluDest
.getReg())
430 return MachineBasicBlock::iterator();
432 // X - (Y + Z) may generate different flags than (X - Y) - Z when
433 // there is overflow. So we can't change the alu instruction if the
434 // flags register is live.
435 if (!CurInst
->registerDefIsDead(X86::EFLAGS
, TRI
))
436 return MachineBasicBlock::iterator();
440 if (TRI
->regsOverlap(DestReg
, Opnd
.getReg()))
441 return MachineBasicBlock::iterator();
448 return MachineBasicBlock::iterator();
451 void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator
&LeaI
,
452 MachineBasicBlock::iterator
&AluI
,
453 bool &BaseIndexDef
, bool &AluDestRef
,
454 MachineOperand
**KilledBase
,
455 MachineOperand
**KilledIndex
) const {
456 BaseIndexDef
= AluDestRef
= false;
457 *KilledBase
= *KilledIndex
= nullptr;
458 Register BaseReg
= LeaI
->getOperand(1 + X86::AddrBaseReg
).getReg();
459 Register IndexReg
= LeaI
->getOperand(1 + X86::AddrIndexReg
).getReg();
460 Register AluDestReg
= AluI
->getOperand(0).getReg();
462 for (MachineInstr
&CurInst
: llvm::make_range(std::next(LeaI
), AluI
)) {
463 for (MachineOperand
&Opnd
: CurInst
.operands()) {
466 Register Reg
= Opnd
.getReg();
467 if (TRI
->regsOverlap(Reg
, AluDestReg
))
469 if (TRI
->regsOverlap(Reg
, BaseReg
)) {
472 else if (Opnd
.isKill())
475 if (TRI
->regsOverlap(Reg
, IndexReg
)) {
478 else if (Opnd
.isKill())
479 *KilledIndex
= &Opnd
;
485 bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator
&I
,
486 MachineBasicBlock
&MBB
) const {
487 // Look for an add/sub instruction which uses the result of lea.
488 MachineBasicBlock::iterator AluI
= searchALUInst(I
, MBB
);
489 if (AluI
== MachineBasicBlock::iterator())
492 // Check if there are any related register usage between lea and alu.
493 bool BaseIndexDef
, AluDestRef
;
494 MachineOperand
*KilledBase
, *KilledIndex
;
495 checkRegUsage(I
, AluI
, BaseIndexDef
, AluDestRef
, &KilledBase
, &KilledIndex
);
497 MachineBasicBlock::iterator InsertPos
= AluI
;
502 KilledBase
= KilledIndex
= nullptr;
505 // Check if there are same registers.
506 Register AluDestReg
= AluI
->getOperand(0).getReg();
507 Register BaseReg
= I
->getOperand(1 + X86::AddrBaseReg
).getReg();
508 Register IndexReg
= I
->getOperand(1 + X86::AddrIndexReg
).getReg();
509 if (I
->getOpcode() == X86::LEA64_32r
) {
510 BaseReg
= TRI
->getSubReg(BaseReg
, X86::sub_32bit
);
511 IndexReg
= TRI
->getSubReg(IndexReg
, X86::sub_32bit
);
513 if (AluDestReg
== IndexReg
) {
514 if (BaseReg
== IndexReg
)
516 std::swap(BaseReg
, IndexReg
);
517 std::swap(KilledBase
, KilledIndex
);
519 if (BaseReg
== IndexReg
)
520 KilledBase
= nullptr;
522 // Now it's safe to change instructions.
523 MachineInstr
*NewMI1
, *NewMI2
;
524 unsigned NewOpcode
= AluI
->getOpcode();
525 NewMI1
= BuildMI(MBB
, InsertPos
, AluI
->getDebugLoc(), TII
->get(NewOpcode
),
527 .addReg(AluDestReg
, RegState::Kill
)
528 .addReg(BaseReg
, KilledBase
? RegState::Kill
: 0);
529 NewMI1
->addRegisterDead(X86::EFLAGS
, TRI
);
530 NewMI2
= BuildMI(MBB
, InsertPos
, AluI
->getDebugLoc(), TII
->get(NewOpcode
),
532 .addReg(AluDestReg
, RegState::Kill
)
533 .addReg(IndexReg
, KilledIndex
? RegState::Kill
: 0);
534 NewMI2
->addRegisterDead(X86::EFLAGS
, TRI
);
536 // Clear the old Kill flags.
538 KilledBase
->setIsKill(false);
540 KilledIndex
->setIsKill(false);
542 MBB
.getParent()->substituteDebugValuesForInst(*AluI
, *NewMI2
, 1);
549 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator
&I
,
550 MachineBasicBlock
&MBB
, bool OptIncDec
,
551 bool UseLEAForSP
) const {
552 MachineInstr
&MI
= *I
;
554 const MachineOperand
&Base
= MI
.getOperand(1 + X86::AddrBaseReg
);
555 const MachineOperand
&Scale
= MI
.getOperand(1 + X86::AddrScaleAmt
);
556 const MachineOperand
&Index
= MI
.getOperand(1 + X86::AddrIndexReg
);
557 const MachineOperand
&Disp
= MI
.getOperand(1 + X86::AddrDisp
);
558 const MachineOperand
&Segment
= MI
.getOperand(1 + X86::AddrSegmentReg
);
560 if (Segment
.getReg() != 0 || !Disp
.isImm() || Scale
.getImm() > 1 ||
561 MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, I
) !=
562 MachineBasicBlock::LQR_Dead
)
565 Register DestReg
= MI
.getOperand(0).getReg();
566 Register BaseReg
= Base
.getReg();
567 Register IndexReg
= Index
.getReg();
569 // Don't change stack adjustment LEAs.
570 if (UseLEAForSP
&& (DestReg
== X86::ESP
|| DestReg
== X86::RSP
))
573 // LEA64_32 has 64-bit operands but 32-bit result.
574 if (MI
.getOpcode() == X86::LEA64_32r
) {
576 BaseReg
= TRI
->getSubReg(BaseReg
, X86::sub_32bit
);
578 IndexReg
= TRI
->getSubReg(IndexReg
, X86::sub_32bit
);
581 MachineInstr
*NewMI
= nullptr;
584 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
585 // which can be turned into add %reg2, %reg1
586 if (BaseReg
!= 0 && IndexReg
!= 0 && Disp
.getImm() == 0 &&
587 (DestReg
== BaseReg
|| DestReg
== IndexReg
)) {
588 unsigned NewOpcode
= getADDrrFromLEA(MI
.getOpcode());
589 if (DestReg
!= BaseReg
)
590 std::swap(BaseReg
, IndexReg
);
592 if (MI
.getOpcode() == X86::LEA64_32r
) {
593 // TODO: Do we need the super register implicit use?
594 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
595 .addReg(BaseReg
).addReg(IndexReg
)
596 .addReg(Base
.getReg(), RegState::Implicit
)
597 .addReg(Index
.getReg(), RegState::Implicit
);
599 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
600 .addReg(BaseReg
).addReg(IndexReg
);
602 } else if (DestReg
== BaseReg
&& IndexReg
== 0) {
604 // This is an LEA with only a base register and a displacement,
605 // We can use ADDri or INC/DEC.
607 // Does this LEA have one these forms:
609 // lea %reg, -1(%reg)
610 if (OptIncDec
&& (Disp
.getImm() == 1 || Disp
.getImm() == -1)) {
611 bool IsINC
= Disp
.getImm() == 1;
612 unsigned NewOpcode
= getINCDECFromLEA(MI
.getOpcode(), IsINC
);
614 if (MI
.getOpcode() == X86::LEA64_32r
) {
615 // TODO: Do we need the super register implicit use?
616 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
617 .addReg(BaseReg
).addReg(Base
.getReg(), RegState::Implicit
);
619 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
623 unsigned NewOpcode
= getADDriFromLEA(MI
.getOpcode(), Disp
);
624 if (MI
.getOpcode() == X86::LEA64_32r
) {
625 // TODO: Do we need the super register implicit use?
626 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
627 .addReg(BaseReg
).addImm(Disp
.getImm())
628 .addReg(Base
.getReg(), RegState::Implicit
);
630 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpcode
), DestReg
)
631 .addReg(BaseReg
).addImm(Disp
.getImm());
634 } else if (BaseReg
!= 0 && IndexReg
!= 0 && Disp
.getImm() == 0) {
636 // Look for and transform the sequence
637 // lea (reg1, reg2), reg3
639 return optLEAALU(I
, MBB
);
643 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
649 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator
&I
,
650 MachineBasicBlock
&MBB
) {
651 // Process a load, store, or LEA instruction.
652 MachineInstr
&MI
= *I
;
653 const MCInstrDesc
&Desc
= MI
.getDesc();
654 int AddrOffset
= X86II::getMemoryOperandNo(Desc
.TSFlags
);
655 if (AddrOffset
>= 0) {
656 AddrOffset
+= X86II::getOperandBias(Desc
);
657 MachineOperand
&p
= MI
.getOperand(AddrOffset
+ X86::AddrBaseReg
);
658 if (p
.isReg() && p
.getReg() != X86::ESP
) {
659 seekLEAFixup(p
, I
, MBB
);
661 MachineOperand
&q
= MI
.getOperand(AddrOffset
+ X86::AddrIndexReg
);
662 if (q
.isReg() && q
.getReg() != X86::ESP
) {
663 seekLEAFixup(q
, I
, MBB
);
668 void FixupLEAPass::seekLEAFixup(MachineOperand
&p
,
669 MachineBasicBlock::iterator
&I
,
670 MachineBasicBlock
&MBB
) {
671 MachineBasicBlock::iterator MBI
= searchBackwards(p
, I
, MBB
);
672 if (MBI
!= MachineBasicBlock::iterator()) {
673 MachineInstr
*NewMI
= postRAConvertToLEA(MBB
, MBI
);
676 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI
->dump(););
677 // now to replace with an equivalent LEA...
678 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI
->dump(););
679 MBB
.getParent()->substituteDebugValuesForInst(*MBI
, *NewMI
, 1);
681 MachineBasicBlock::iterator J
=
682 static_cast<MachineBasicBlock::iterator
>(NewMI
);
683 processInstruction(J
, MBB
);
688 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator
&I
,
689 MachineBasicBlock
&MBB
) {
690 MachineInstr
&MI
= *I
;
691 const unsigned Opcode
= MI
.getOpcode();
693 const MachineOperand
&Dst
= MI
.getOperand(0);
694 const MachineOperand
&Base
= MI
.getOperand(1 + X86::AddrBaseReg
);
695 const MachineOperand
&Scale
= MI
.getOperand(1 + X86::AddrScaleAmt
);
696 const MachineOperand
&Index
= MI
.getOperand(1 + X86::AddrIndexReg
);
697 const MachineOperand
&Offset
= MI
.getOperand(1 + X86::AddrDisp
);
698 const MachineOperand
&Segment
= MI
.getOperand(1 + X86::AddrSegmentReg
);
700 if (Segment
.getReg() != 0 || !Offset
.isImm() ||
701 MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, I
, 4) !=
702 MachineBasicBlock::LQR_Dead
)
704 const Register DstR
= Dst
.getReg();
705 const Register SrcR1
= Base
.getReg();
706 const Register SrcR2
= Index
.getReg();
707 if ((SrcR1
== 0 || SrcR1
!= DstR
) && (SrcR2
== 0 || SrcR2
!= DstR
))
709 if (Scale
.getImm() > 1)
711 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I
->dump(););
712 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
713 MachineInstr
*NewMI
= nullptr;
714 // Make ADD instruction for two registers writing to LEA's destination
715 if (SrcR1
!= 0 && SrcR2
!= 0) {
716 const MCInstrDesc
&ADDrr
= TII
->get(getADDrrFromLEA(Opcode
));
717 const MachineOperand
&Src
= SrcR1
== DstR
? Index
: Base
;
719 BuildMI(MBB
, I
, MI
.getDebugLoc(), ADDrr
, DstR
).addReg(DstR
).add(Src
);
720 LLVM_DEBUG(NewMI
->dump(););
722 // Make ADD instruction for immediate
723 if (Offset
.getImm() != 0) {
724 const MCInstrDesc
&ADDri
=
725 TII
->get(getADDriFromLEA(Opcode
, Offset
));
726 const MachineOperand
&SrcR
= SrcR1
== DstR
? Base
: Index
;
727 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), ADDri
, DstR
)
729 .addImm(Offset
.getImm());
730 LLVM_DEBUG(NewMI
->dump(););
733 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
739 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator
&I
,
740 MachineBasicBlock
&MBB
,
742 MachineInstr
&MI
= *I
;
743 const unsigned LEAOpcode
= MI
.getOpcode();
745 const MachineOperand
&Dest
= MI
.getOperand(0);
746 const MachineOperand
&Base
= MI
.getOperand(1 + X86::AddrBaseReg
);
747 const MachineOperand
&Scale
= MI
.getOperand(1 + X86::AddrScaleAmt
);
748 const MachineOperand
&Index
= MI
.getOperand(1 + X86::AddrIndexReg
);
749 const MachineOperand
&Offset
= MI
.getOperand(1 + X86::AddrDisp
);
750 const MachineOperand
&Segment
= MI
.getOperand(1 + X86::AddrSegmentReg
);
752 if (!(TII
->isThreeOperandsLEA(MI
) || hasInefficientLEABaseReg(Base
, Index
)) ||
753 MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, I
, 4) !=
754 MachineBasicBlock::LQR_Dead
||
755 Segment
.getReg() != X86::NoRegister
)
758 Register DestReg
= Dest
.getReg();
759 Register BaseReg
= Base
.getReg();
760 Register IndexReg
= Index
.getReg();
762 if (MI
.getOpcode() == X86::LEA64_32r
) {
764 BaseReg
= TRI
->getSubReg(BaseReg
, X86::sub_32bit
);
766 IndexReg
= TRI
->getSubReg(IndexReg
, X86::sub_32bit
);
769 bool IsScale1
= Scale
.getImm() == 1;
770 bool IsInefficientBase
= isInefficientLEAReg(BaseReg
);
771 bool IsInefficientIndex
= isInefficientLEAReg(IndexReg
);
773 // Skip these cases since it takes more than 2 instructions
774 // to replace the LEA instruction.
775 if (IsInefficientBase
&& DestReg
== BaseReg
&& !IsScale1
)
778 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI
.dump(););
779 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
781 MachineInstr
*NewMI
= nullptr;
782 bool BaseOrIndexIsDst
= DestReg
== BaseReg
|| DestReg
== IndexReg
;
783 // First try and remove the base while sticking with LEA iff base == index and
784 // scale == 1. We can handle:
785 // 1. lea D(%base,%index,1) -> lea D(,%index,2)
786 // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
787 // Only do this if the LEA would otherwise be split into 2-instruction
788 // (either it has a an Offset or neither base nor index are dst)
789 if (IsScale1
&& BaseReg
== IndexReg
&&
790 (hasLEAOffset(Offset
) || (IsInefficientBase
&& !BaseOrIndexIsDst
))) {
791 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(LEAOpcode
))
798 LLVM_DEBUG(NewMI
->dump(););
800 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
804 } else if (IsScale1
&& BaseOrIndexIsDst
) {
805 // Try to replace LEA with one or two (for the 3-op LEA case)
807 // 1.lea (%base,%index,1), %base => add %index,%base
808 // 2.lea (%base,%index,1), %index => add %base,%index
810 unsigned NewOpc
= getADDrrFromLEA(MI
.getOpcode());
811 if (DestReg
!= BaseReg
)
812 std::swap(BaseReg
, IndexReg
);
814 if (MI
.getOpcode() == X86::LEA64_32r
) {
815 // TODO: Do we need the super register implicit use?
816 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
819 .addReg(Base
.getReg(), RegState::Implicit
)
820 .addReg(Index
.getReg(), RegState::Implicit
);
822 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
826 } else if (!IsInefficientBase
|| (!IsInefficientIndex
&& IsScale1
)) {
827 // If the base is inefficient try switching the index and base operands,
828 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
829 // lea offset(%base,%index,scale),%dst =>
830 // lea (%base,%index,scale); add offset,%dst
831 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(LEAOpcode
))
833 .add(IsInefficientBase
? Index
: Base
)
835 .add(IsInefficientBase
? Base
: Index
)
838 LLVM_DEBUG(NewMI
->dump(););
841 // If either replacement succeeded above, add the offset if needed, then
842 // replace the instruction.
844 // Create ADD instruction for the Offset in case of 3-Ops LEA.
845 if (hasLEAOffset(Offset
)) {
846 if (OptIncDec
&& Offset
.isImm() &&
847 (Offset
.getImm() == 1 || Offset
.getImm() == -1)) {
849 getINCDECFromLEA(MI
.getOpcode(), Offset
.getImm() == 1);
850 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
852 LLVM_DEBUG(NewMI
->dump(););
854 unsigned NewOpc
= getADDriFromLEA(MI
.getOpcode(), Offset
);
855 NewMI
= BuildMI(MBB
, I
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
858 LLVM_DEBUG(NewMI
->dump(););
862 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
868 // Handle the rest of the cases with inefficient base register:
869 assert(DestReg
!= BaseReg
&& "DestReg == BaseReg should be handled already!");
870 assert(IsInefficientBase
&& "efficient base should be handled already!");
872 // FIXME: Handle LEA64_32r.
873 if (LEAOpcode
== X86::LEA64_32r
)
876 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
877 if (IsScale1
&& !hasLEAOffset(Offset
)) {
878 bool BIK
= Base
.isKill() && BaseReg
!= IndexReg
;
879 TII
->copyPhysReg(MBB
, MI
, MI
.getDebugLoc(), DestReg
, BaseReg
, BIK
);
880 LLVM_DEBUG(MI
.getPrevNode()->dump(););
882 unsigned NewOpc
= getADDrrFromLEA(MI
.getOpcode());
883 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
886 LLVM_DEBUG(NewMI
->dump(););
888 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);
894 // lea offset(%base,%index,scale), %dst =>
895 // lea offset( ,%index,scale), %dst; add %base,%dst
896 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(LEAOpcode
))
903 LLVM_DEBUG(NewMI
->dump(););
905 unsigned NewOpc
= getADDrrFromLEA(MI
.getOpcode());
906 NewMI
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), TII
->get(NewOpc
), DestReg
)
909 LLVM_DEBUG(NewMI
->dump(););
911 MBB
.getParent()->substituteDebugValuesForInst(*I
, *NewMI
, 1);