1 //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the PowerPC implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCInstrInfo.h"
14 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCHazardRecognizers.h"
17 #include "PPCInstrBuilder.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/LivePhysRegs.h"
24 #include "llvm/CodeGen/MachineCombinerPattern.h"
25 #include "llvm/CodeGen/MachineConstantPool.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/PseudoSourceValue.h"
31 #include "llvm/CodeGen/RegisterClassInfo.h"
32 #include "llvm/CodeGen/RegisterPressure.h"
33 #include "llvm/CodeGen/ScheduleDAG.h"
34 #include "llvm/CodeGen/SlotIndexes.h"
35 #include "llvm/CodeGen/StackMaps.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/TargetRegistry.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/raw_ostream.h"
46 #define DEBUG_TYPE "ppc-instr-info"
48 #define GET_INSTRMAP_INFO
49 #define GET_INSTRINFO_CTOR_DTOR
50 #include "PPCGenInstrInfo.inc"
52 STATISTIC(NumStoreSPILLVSRRCAsVec
,
53 "Number of spillvsrrc spilled to stack as vec");
54 STATISTIC(NumStoreSPILLVSRRCAsGpr
,
55 "Number of spillvsrrc spilled to stack as gpr");
56 STATISTIC(NumGPRtoVSRSpill
, "Number of gpr spills to spillvsrrc");
57 STATISTIC(CmpIselsConverted
,
58 "Number of ISELs that depend on comparison of constants converted");
59 STATISTIC(MissedConvertibleImmediateInstrs
,
60 "Number of compare-immediate instructions fed by constants");
61 STATISTIC(NumRcRotatesConvertedToRcAnd
,
62 "Number of record-form rotates converted to record-form andi");
65 opt
<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden
,
66 cl::desc("Disable analysis for CTR loops"));
68 static cl::opt
<bool> DisableCmpOpt("disable-ppc-cmp-opt",
69 cl::desc("Disable compare instruction optimization"), cl::Hidden
);
71 static cl::opt
<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
72 cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76 UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden
,
77 cl::desc("Use the old (incorrect) instruction latency calculation"));
80 FMARPFactor("ppc-fma-rp-factor", cl::Hidden
, cl::init(1.5),
81 cl::desc("register pressure factor for the transformations."));
83 static cl::opt
<bool> EnableFMARegPressureReduction(
84 "ppc-fma-rp-reduction", cl::Hidden
, cl::init(true),
85 cl::desc("enable register pressure reduce in machine combiner pass."));
87 // Pin the vtable to this file.
88 void PPCInstrInfo::anchor() {}
90 PPCInstrInfo::PPCInstrInfo(PPCSubtarget
&STI
)
91 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN
, PPC::ADJCALLSTACKUP
,
92 /* CatchRetOpcode */ -1,
93 STI
.isPPC64() ? PPC::BLR8
: PPC::BLR
),
94 Subtarget(STI
), RI(STI
.getTargetMachine()) {}
96 /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
97 /// this target when scheduling the DAG.
98 ScheduleHazardRecognizer
*
99 PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo
*STI
,
100 const ScheduleDAG
*DAG
) const {
102 static_cast<const PPCSubtarget
*>(STI
)->getCPUDirective();
103 if (Directive
== PPC::DIR_440
|| Directive
== PPC::DIR_A2
||
104 Directive
== PPC::DIR_E500mc
|| Directive
== PPC::DIR_E5500
) {
105 const InstrItineraryData
*II
=
106 static_cast<const PPCSubtarget
*>(STI
)->getInstrItineraryData();
107 return new ScoreboardHazardRecognizer(II
, DAG
);
110 return TargetInstrInfo::CreateTargetHazardRecognizer(STI
, DAG
);
113 /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
114 /// to use for this target when scheduling the DAG.
115 ScheduleHazardRecognizer
*
116 PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData
*II
,
117 const ScheduleDAG
*DAG
) const {
119 DAG
->MF
.getSubtarget
<PPCSubtarget
>().getCPUDirective();
121 // FIXME: Leaving this as-is until we have POWER9 scheduling info
122 if (Directive
== PPC::DIR_PWR7
|| Directive
== PPC::DIR_PWR8
)
123 return new PPCDispatchGroupSBHazardRecognizer(II
, DAG
);
125 // Most subtargets use a PPC970 recognizer.
126 if (Directive
!= PPC::DIR_440
&& Directive
!= PPC::DIR_A2
&&
127 Directive
!= PPC::DIR_E500mc
&& Directive
!= PPC::DIR_E5500
) {
128 assert(DAG
->TII
&& "No InstrInfo?");
130 return new PPCHazardRecognizer970(*DAG
);
133 return new ScoreboardHazardRecognizer(II
, DAG
);
136 unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData
*ItinData
,
137 const MachineInstr
&MI
,
138 unsigned *PredCost
) const {
139 if (!ItinData
|| UseOldLatencyCalc
)
140 return PPCGenInstrInfo::getInstrLatency(ItinData
, MI
, PredCost
);
142 // The default implementation of getInstrLatency calls getStageLatency, but
143 // getStageLatency does not do the right thing for us. While we have
144 // itinerary, most cores are fully pipelined, and so the itineraries only
145 // express the first part of the pipeline, not every stage. Instead, we need
146 // to use the listed output operand cycle number (using operand 0 here, which
149 unsigned Latency
= 1;
150 unsigned DefClass
= MI
.getDesc().getSchedClass();
151 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
152 const MachineOperand
&MO
= MI
.getOperand(i
);
153 if (!MO
.isReg() || !MO
.isDef() || MO
.isImplicit())
156 std::optional
<unsigned> Cycle
= ItinData
->getOperandCycle(DefClass
, i
);
160 Latency
= std::max(Latency
, *Cycle
);
166 std::optional
<unsigned> PPCInstrInfo::getOperandLatency(
167 const InstrItineraryData
*ItinData
, const MachineInstr
&DefMI
,
168 unsigned DefIdx
, const MachineInstr
&UseMI
, unsigned UseIdx
) const {
169 std::optional
<unsigned> Latency
= PPCGenInstrInfo::getOperandLatency(
170 ItinData
, DefMI
, DefIdx
, UseMI
, UseIdx
);
172 if (!DefMI
.getParent())
175 const MachineOperand
&DefMO
= DefMI
.getOperand(DefIdx
);
176 Register Reg
= DefMO
.getReg();
179 if (Reg
.isVirtual()) {
180 const MachineRegisterInfo
*MRI
=
181 &DefMI
.getParent()->getParent()->getRegInfo();
182 IsRegCR
= MRI
->getRegClass(Reg
)->hasSuperClassEq(&PPC::CRRCRegClass
) ||
183 MRI
->getRegClass(Reg
)->hasSuperClassEq(&PPC::CRBITRCRegClass
);
185 IsRegCR
= PPC::CRRCRegClass
.contains(Reg
) ||
186 PPC::CRBITRCRegClass
.contains(Reg
);
189 if (UseMI
.isBranch() && IsRegCR
) {
191 Latency
= getInstrLatency(ItinData
, DefMI
);
193 // On some cores, there is an additional delay between writing to a condition
194 // register, and using it from a branch.
195 unsigned Directive
= Subtarget
.getCPUDirective();
209 // FIXME: Is this needed for POWER9?
210 Latency
= *Latency
+ 2;
218 void PPCInstrInfo::setSpecialOperandAttr(MachineInstr
&MI
,
219 uint32_t Flags
) const {
221 MI
.clearFlag(MachineInstr::MIFlag::NoSWrap
);
222 MI
.clearFlag(MachineInstr::MIFlag::NoUWrap
);
223 MI
.clearFlag(MachineInstr::MIFlag::IsExact
);
226 // This function does not list all associative and commutative operations, but
227 // only those worth feeding through the machine combiner in an attempt to
228 // reduce the critical path. Mostly, this means floating-point operations,
229 // because they have high latencies(>=5) (compared to other operations, such as
230 // and/or, which are also associative and commutative, but have low latencies).
231 bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr
&Inst
,
235 switch (Inst
.getOpcode()) {
255 return Inst
.getFlag(MachineInstr::MIFlag::FmReassoc
) &&
256 Inst
.getFlag(MachineInstr::MIFlag::FmNsz
);
269 #define InfoArrayIdxFMAInst 0
270 #define InfoArrayIdxFAddInst 1
271 #define InfoArrayIdxFMULInst 2
272 #define InfoArrayIdxAddOpIdx 3
273 #define InfoArrayIdxMULOpIdx 4
274 #define InfoArrayIdxFSubInst 5
275 // Array keeps info for FMA instructions:
276 // Index 0(InfoArrayIdxFMAInst): FMA instruction;
277 // Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
278 // Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
279 // Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
280 // Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
281 // second MUL operand index is plus 1;
282 // Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
283 static const uint16_t FMAOpIdxInfo
[][6] = {
284 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
285 {PPC::XSMADDADP
, PPC::XSADDDP
, PPC::XSMULDP
, 1, 2, PPC::XSSUBDP
},
286 {PPC::XSMADDASP
, PPC::XSADDSP
, PPC::XSMULSP
, 1, 2, PPC::XSSUBSP
},
287 {PPC::XVMADDADP
, PPC::XVADDDP
, PPC::XVMULDP
, 1, 2, PPC::XVSUBDP
},
288 {PPC::XVMADDASP
, PPC::XVADDSP
, PPC::XVMULSP
, 1, 2, PPC::XVSUBSP
},
289 {PPC::FMADD
, PPC::FADD
, PPC::FMUL
, 3, 1, PPC::FSUB
},
290 {PPC::FMADDS
, PPC::FADDS
, PPC::FMULS
, 3, 1, PPC::FSUBS
}};
292 // Check if an opcode is a FMA instruction. If it is, return the index in array
293 // FMAOpIdxInfo. Otherwise, return -1.
294 int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode
) const {
295 for (unsigned I
= 0; I
< std::size(FMAOpIdxInfo
); I
++)
296 if (FMAOpIdxInfo
[I
][InfoArrayIdxFMAInst
] == Opcode
)
301 // On PowerPC target, we have two kinds of patterns related to FMA:
303 // Try to reassociate FMA chains like below:
306 // A = FADD X, Y (Leaf)
307 // B = FMA A, M21, M22 (Prev)
308 // C = FMA B, M31, M32 (Root)
310 // A = FMA X, M21, M22
311 // B = FMA Y, M31, M32
315 // A = FMA X, M11, M12 (Leaf)
316 // B = FMA A, M21, M22 (Prev)
317 // C = FMA B, M31, M32 (Root)
320 // B = FMA X, M21, M22
321 // D = FMA A, M31, M32
324 // breaking the dependency between A and B, allowing FMA to be executed in
325 // parallel (or back-to-back in a pipeline) instead of depending on each other.
327 // 2: Reduce register pressure.
328 // Try to reassociate FMA with FSUB and a constant like below:
329 // C is a floating point const.
332 // A = FSUB X, Y (Leaf)
333 // D = FMA B, C, A (Root)
339 // A = FSUB X, Y (Leaf)
340 // D = FMA B, A, C (Root)
345 // Before the transformation, A must be assigned with different hardware
346 // register with D. After the transformation, A and D must be assigned with
347 // same hardware register due to TIE attribute of FMA instructions.
349 bool PPCInstrInfo::getFMAPatterns(MachineInstr
&Root
,
350 SmallVectorImpl
<unsigned> &Patterns
,
351 bool DoRegPressureReduce
) const {
352 MachineBasicBlock
*MBB
= Root
.getParent();
353 const MachineRegisterInfo
*MRI
= &MBB
->getParent()->getRegInfo();
354 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
356 auto IsAllOpsVirtualReg
= [](const MachineInstr
&Instr
) {
357 for (const auto &MO
: Instr
.explicit_operands())
358 if (!(MO
.isReg() && MO
.getReg().isVirtual()))
363 auto IsReassociableAddOrSub
= [&](const MachineInstr
&Instr
,
365 if (Instr
.getOpcode() !=
366 FMAOpIdxInfo
[getFMAOpIdxInfo(Root
.getOpcode())][OpType
])
369 // Instruction can be reassociated.
370 // fast math flags may prohibit reassociation.
371 if (!(Instr
.getFlag(MachineInstr::MIFlag::FmReassoc
) &&
372 Instr
.getFlag(MachineInstr::MIFlag::FmNsz
)))
375 // Instruction operands are virtual registers for reassociation.
376 if (!IsAllOpsVirtualReg(Instr
))
379 // For register pressure reassociation, the FSub must have only one use as
380 // we want to delete the sub to save its def.
381 if (OpType
== InfoArrayIdxFSubInst
&&
382 !MRI
->hasOneNonDBGUse(Instr
.getOperand(0).getReg()))
388 auto IsReassociableFMA
= [&](const MachineInstr
&Instr
, int16_t &AddOpIdx
,
389 int16_t &MulOpIdx
, bool IsLeaf
) {
390 int16_t Idx
= getFMAOpIdxInfo(Instr
.getOpcode());
394 // Instruction can be reassociated.
395 // fast math flags may prohibit reassociation.
396 if (!(Instr
.getFlag(MachineInstr::MIFlag::FmReassoc
) &&
397 Instr
.getFlag(MachineInstr::MIFlag::FmNsz
)))
400 // Instruction operands are virtual registers for reassociation.
401 if (!IsAllOpsVirtualReg(Instr
))
404 MulOpIdx
= FMAOpIdxInfo
[Idx
][InfoArrayIdxMULOpIdx
];
408 AddOpIdx
= FMAOpIdxInfo
[Idx
][InfoArrayIdxAddOpIdx
];
410 const MachineOperand
&OpAdd
= Instr
.getOperand(AddOpIdx
);
411 MachineInstr
*MIAdd
= MRI
->getUniqueVRegDef(OpAdd
.getReg());
412 // If 'add' operand's def is not in current block, don't do ILP related opt.
413 if (!MIAdd
|| MIAdd
->getParent() != MBB
)
416 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
417 // as this fma will be changed later.
418 return IsLeaf
? true : MRI
->hasOneNonDBGUse(OpAdd
.getReg());
421 int16_t AddOpIdx
= -1;
422 int16_t MulOpIdx
= -1;
424 bool IsUsedOnceL
= false;
425 bool IsUsedOnceR
= false;
426 MachineInstr
*MULInstrL
= nullptr;
427 MachineInstr
*MULInstrR
= nullptr;
429 auto IsRPReductionCandidate
= [&]() {
430 // Currently, we only support float and double.
431 // FIXME: add support for other types.
432 unsigned Opcode
= Root
.getOpcode();
433 if (Opcode
!= PPC::XSMADDASP
&& Opcode
!= PPC::XSMADDADP
)
436 // Root must be a valid FMA like instruction.
437 // Treat it as leaf as we don't care its add operand.
438 if (IsReassociableFMA(Root
, AddOpIdx
, MulOpIdx
, true)) {
439 assert((MulOpIdx
>= 0) && "mul operand index not right!");
440 Register MULRegL
= TRI
->lookThruSingleUseCopyChain(
441 Root
.getOperand(MulOpIdx
).getReg(), MRI
);
442 Register MULRegR
= TRI
->lookThruSingleUseCopyChain(
443 Root
.getOperand(MulOpIdx
+ 1).getReg(), MRI
);
444 if (!MULRegL
&& !MULRegR
)
447 if (MULRegL
&& !MULRegR
) {
449 TRI
->lookThruCopyLike(Root
.getOperand(MulOpIdx
+ 1).getReg(), MRI
);
451 } else if (!MULRegL
&& MULRegR
) {
453 TRI
->lookThruCopyLike(Root
.getOperand(MulOpIdx
).getReg(), MRI
);
460 if (!MULRegL
.isVirtual() || !MULRegR
.isVirtual())
463 MULInstrL
= MRI
->getVRegDef(MULRegL
);
464 MULInstrR
= MRI
->getVRegDef(MULRegR
);
470 // Register pressure fma reassociation patterns.
471 if (DoRegPressureReduce
&& IsRPReductionCandidate()) {
472 assert((MULInstrL
&& MULInstrR
) && "wrong register preduction candidate!");
473 // Register pressure pattern 1
474 if (isLoadFromConstantPool(MULInstrL
) && IsUsedOnceR
&&
475 IsReassociableAddOrSub(*MULInstrR
, InfoArrayIdxFSubInst
)) {
476 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
477 Patterns
.push_back(PPCMachineCombinerPattern::REASSOC_XY_BCA
);
481 // Register pressure pattern 2
482 if ((isLoadFromConstantPool(MULInstrR
) && IsUsedOnceL
&&
483 IsReassociableAddOrSub(*MULInstrL
, InfoArrayIdxFSubInst
))) {
484 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
485 Patterns
.push_back(PPCMachineCombinerPattern::REASSOC_XY_BAC
);
490 // ILP fma reassociation patterns.
491 // Root must be a valid FMA like instruction.
493 if (!IsReassociableFMA(Root
, AddOpIdx
, MulOpIdx
, false))
496 assert((AddOpIdx
>= 0) && "add operand index not right!");
498 Register RegB
= Root
.getOperand(AddOpIdx
).getReg();
499 MachineInstr
*Prev
= MRI
->getUniqueVRegDef(RegB
);
501 // Prev must be a valid FMA like instruction.
503 if (!IsReassociableFMA(*Prev
, AddOpIdx
, MulOpIdx
, false))
506 assert((AddOpIdx
>= 0) && "add operand index not right!");
508 Register RegA
= Prev
->getOperand(AddOpIdx
).getReg();
509 MachineInstr
*Leaf
= MRI
->getUniqueVRegDef(RegA
);
511 if (IsReassociableFMA(*Leaf
, AddOpIdx
, MulOpIdx
, true)) {
512 Patterns
.push_back(PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
);
513 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 if (IsReassociableAddOrSub(*Leaf
, InfoArrayIdxFAddInst
)) {
517 Patterns
.push_back(PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
);
518 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
524 void PPCInstrInfo::finalizeInsInstrs(
525 MachineInstr
&Root
, unsigned &Pattern
,
526 SmallVectorImpl
<MachineInstr
*> &InsInstrs
) const {
527 assert(!InsInstrs
.empty() && "Instructions set to be inserted is empty!");
529 MachineFunction
*MF
= Root
.getMF();
530 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
531 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
532 MachineConstantPool
*MCP
= MF
->getConstantPool();
534 int16_t Idx
= getFMAOpIdxInfo(Root
.getOpcode());
538 uint16_t FirstMulOpIdx
= FMAOpIdxInfo
[Idx
][InfoArrayIdxMULOpIdx
];
540 // For now we only need to fix up placeholder for register pressure reduce
542 Register ConstReg
= 0;
544 case PPCMachineCombinerPattern::REASSOC_XY_BCA
:
546 TRI
->lookThruCopyLike(Root
.getOperand(FirstMulOpIdx
).getReg(), MRI
);
548 case PPCMachineCombinerPattern::REASSOC_XY_BAC
:
550 TRI
->lookThruCopyLike(Root
.getOperand(FirstMulOpIdx
+ 1).getReg(), MRI
);
553 // Not register pressure reduce patterns.
557 MachineInstr
*ConstDefInstr
= MRI
->getVRegDef(ConstReg
);
558 // Get const value from const pool.
559 const Constant
*C
= getConstantFromConstantPool(ConstDefInstr
);
560 assert(isa
<llvm::ConstantFP
>(C
) && "not a valid constant!");
562 // Get negative fp const.
563 APFloat
F1((dyn_cast
<ConstantFP
>(C
))->getValueAPF());
565 Constant
*NegC
= ConstantFP::get(dyn_cast
<ConstantFP
>(C
)->getContext(), F1
);
566 Align Alignment
= MF
->getDataLayout().getPrefTypeAlign(C
->getType());
568 // Put negative fp const into constant pool.
569 unsigned ConstPoolIdx
= MCP
->getConstantPoolIndex(NegC
, Alignment
);
571 MachineOperand
*Placeholder
= nullptr;
572 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
573 for (auto *Inst
: InsInstrs
) {
574 for (MachineOperand
&Operand
: Inst
->explicit_operands()) {
575 assert(Operand
.isReg() && "Invalid instruction in InsInstrs!");
576 if (Operand
.getReg() == PPC::ZERO8
) {
577 Placeholder
= &Operand
;
583 assert(Placeholder
&& "Placeholder does not exist!");
585 // Generate instructions to load the const fp from constant pool.
586 // We only support PPC64 and medium code model.
587 Register LoadNewConst
=
588 generateLoadForNewConst(ConstPoolIdx
, &Root
, C
->getType(), InsInstrs
);
590 // Fill the placeholder with the new load from constant pool.
591 Placeholder
->setReg(LoadNewConst
);
594 bool PPCInstrInfo::shouldReduceRegisterPressure(
595 const MachineBasicBlock
*MBB
, const RegisterClassInfo
*RegClassInfo
) const {
597 if (!EnableFMARegPressureReduction
)
600 // Currently, we only enable register pressure reducing in machine combiner
601 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // So we need following instructions to access a TOC entry:
606 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
607 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
608 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
610 // FIXME: add more supported targets, like Small and Large code model, PPC32,
612 if (!(Subtarget
.isPPC64() && Subtarget
.hasP9Vector() &&
613 Subtarget
.getTargetMachine().getCodeModel() == CodeModel::Medium
))
616 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
617 const MachineFunction
*MF
= MBB
->getParent();
618 const MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
620 auto GetMBBPressure
=
621 [&](const MachineBasicBlock
*MBB
) -> std::vector
<unsigned> {
622 RegionPressure Pressure
;
623 RegPressureTracker
RPTracker(Pressure
);
625 // Initialize the register pressure tracker.
626 RPTracker
.init(MBB
->getParent(), RegClassInfo
, nullptr, MBB
, MBB
->end(),
627 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
629 for (const auto &MI
: reverse(*MBB
)) {
630 if (MI
.isDebugValue() || MI
.isDebugLabel())
632 RegisterOperands RegOpers
;
633 RegOpers
.collect(MI
, *TRI
, *MRI
, false, false);
634 RPTracker
.recedeSkipDebugValues();
635 assert(&*RPTracker
.getPos() == &MI
&& "RPTracker sync error!");
636 RPTracker
.recede(RegOpers
);
639 // Close the RPTracker to finalize live ins.
640 RPTracker
.closeRegion();
642 return RPTracker
.getPressure().MaxSetPressure
;
645 // For now we only care about float and double type fma.
646 unsigned VSSRCLimit
=
647 RegClassInfo
->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC
);
649 // Only reduce register pressure when pressure is high.
650 return GetMBBPressure(MBB
)[PPC::RegisterPressureSets::VSSRC
] >
651 (float)VSSRCLimit
* FMARPFactor
;
654 bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr
*I
) const {
655 // I has only one memory operand which is load from constant pool.
656 if (!I
->hasOneMemOperand())
659 MachineMemOperand
*Op
= I
->memoperands()[0];
660 return Op
->isLoad() && Op
->getPseudoValue() &&
661 Op
->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool
;
664 Register
PPCInstrInfo::generateLoadForNewConst(
665 unsigned Idx
, MachineInstr
*MI
, Type
*Ty
,
666 SmallVectorImpl
<MachineInstr
*> &InsInstrs
) const {
667 // Now we only support PPC64, Medium code model and P9 with vector.
668 // We have immutable pattern to access const pool. See function
669 // shouldReduceRegisterPressure.
670 assert((Subtarget
.isPPC64() && Subtarget
.hasP9Vector() &&
671 Subtarget
.getTargetMachine().getCodeModel() == CodeModel::Medium
) &&
672 "Target not supported!\n");
674 MachineFunction
*MF
= MI
->getMF();
675 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
677 // Generate ADDIStocHA8
678 Register VReg1
= MRI
->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
679 MachineInstrBuilder TOCOffset
=
680 BuildMI(*MF
, MI
->getDebugLoc(), get(PPC::ADDIStocHA8
), VReg1
)
682 .addConstantPoolIndex(Idx
);
684 assert((Ty
->isFloatTy() || Ty
->isDoubleTy()) &&
685 "Only float and double are supported!");
688 // Should be float type or double type.
690 LoadOpcode
= PPC::DFLOADf32
;
692 LoadOpcode
= PPC::DFLOADf64
;
694 const TargetRegisterClass
*RC
= MRI
->getRegClass(MI
->getOperand(0).getReg());
695 Register VReg2
= MRI
->createVirtualRegister(RC
);
696 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
697 MachinePointerInfo::getConstantPool(*MF
), MachineMemOperand::MOLoad
,
698 Ty
->getScalarSizeInBits() / 8, MF
->getDataLayout().getPrefTypeAlign(Ty
));
700 // Generate Load from constant pool.
701 MachineInstrBuilder Load
=
702 BuildMI(*MF
, MI
->getDebugLoc(), get(LoadOpcode
), VReg2
)
703 .addConstantPoolIndex(Idx
)
704 .addReg(VReg1
, getKillRegState(true))
707 Load
->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO
);
709 // Insert the toc load instructions into InsInstrs.
710 InsInstrs
.insert(InsInstrs
.begin(), Load
);
711 InsInstrs
.insert(InsInstrs
.begin(), TOCOffset
);
715 // This function returns the const value in constant pool if the \p I is a load
716 // from constant pool.
718 PPCInstrInfo::getConstantFromConstantPool(MachineInstr
*I
) const {
719 MachineFunction
*MF
= I
->getMF();
720 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
721 MachineConstantPool
*MCP
= MF
->getConstantPool();
722 assert(I
->mayLoad() && "Should be a load instruction.\n");
723 for (auto MO
: I
->uses()) {
726 Register Reg
= MO
.getReg();
727 if (Reg
== 0 || !Reg
.isVirtual())
729 // Find the toc address.
730 MachineInstr
*DefMI
= MRI
->getVRegDef(Reg
);
731 for (auto MO2
: DefMI
->uses())
733 return (MCP
->getConstants())[MO2
.getIndex()].Val
.ConstVal
;
738 CombinerObjective
PPCInstrInfo::getCombinerObjective(unsigned Pattern
) const {
740 case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
:
741 case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
:
742 return CombinerObjective::MustReduceDepth
;
743 case PPCMachineCombinerPattern::REASSOC_XY_BCA
:
744 case PPCMachineCombinerPattern::REASSOC_XY_BAC
:
745 return CombinerObjective::MustReduceRegisterPressure
;
747 return TargetInstrInfo::getCombinerObjective(Pattern
);
751 bool PPCInstrInfo::getMachineCombinerPatterns(
752 MachineInstr
&Root
, SmallVectorImpl
<unsigned> &Patterns
,
753 bool DoRegPressureReduce
) const {
754 // Using the machine combiner in this way is potentially expensive, so
755 // restrict to when aggressive optimizations are desired.
756 if (Subtarget
.getTargetMachine().getOptLevel() != CodeGenOptLevel::Aggressive
)
759 if (getFMAPatterns(Root
, Patterns
, DoRegPressureReduce
))
762 return TargetInstrInfo::getMachineCombinerPatterns(Root
, Patterns
,
763 DoRegPressureReduce
);
766 void PPCInstrInfo::genAlternativeCodeSequence(
767 MachineInstr
&Root
, unsigned Pattern
,
768 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
769 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
770 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
772 case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
:
773 case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
:
774 case PPCMachineCombinerPattern::REASSOC_XY_BCA
:
775 case PPCMachineCombinerPattern::REASSOC_XY_BAC
:
776 reassociateFMA(Root
, Pattern
, InsInstrs
, DelInstrs
, InstrIdxForVirtReg
);
779 // Reassociate default patterns.
780 TargetInstrInfo::genAlternativeCodeSequence(Root
, Pattern
, InsInstrs
,
781 DelInstrs
, InstrIdxForVirtReg
);
786 void PPCInstrInfo::reassociateFMA(
787 MachineInstr
&Root
, unsigned Pattern
,
788 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
789 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
790 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
791 MachineFunction
*MF
= Root
.getMF();
792 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
793 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
794 MachineOperand
&OpC
= Root
.getOperand(0);
795 Register RegC
= OpC
.getReg();
796 const TargetRegisterClass
*RC
= MRI
.getRegClass(RegC
);
797 MRI
.constrainRegClass(RegC
, RC
);
799 unsigned FmaOp
= Root
.getOpcode();
800 int16_t Idx
= getFMAOpIdxInfo(FmaOp
);
801 assert(Idx
>= 0 && "Root must be a FMA instruction");
803 bool IsILPReassociate
=
804 (Pattern
== PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
) ||
805 (Pattern
== PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
);
807 uint16_t AddOpIdx
= FMAOpIdxInfo
[Idx
][InfoArrayIdxAddOpIdx
];
808 uint16_t FirstMulOpIdx
= FMAOpIdxInfo
[Idx
][InfoArrayIdxMULOpIdx
];
810 MachineInstr
*Prev
= nullptr;
811 MachineInstr
*Leaf
= nullptr;
814 llvm_unreachable("not recognized pattern!");
815 case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
:
816 case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
:
817 Prev
= MRI
.getUniqueVRegDef(Root
.getOperand(AddOpIdx
).getReg());
818 Leaf
= MRI
.getUniqueVRegDef(Prev
->getOperand(AddOpIdx
).getReg());
820 case PPCMachineCombinerPattern::REASSOC_XY_BAC
: {
822 TRI
->lookThruCopyLike(Root
.getOperand(FirstMulOpIdx
).getReg(), &MRI
);
823 Leaf
= MRI
.getVRegDef(MULReg
);
826 case PPCMachineCombinerPattern::REASSOC_XY_BCA
: {
827 Register MULReg
= TRI
->lookThruCopyLike(
828 Root
.getOperand(FirstMulOpIdx
+ 1).getReg(), &MRI
);
829 Leaf
= MRI
.getVRegDef(MULReg
);
834 uint32_t IntersectedFlags
= 0;
835 if (IsILPReassociate
)
836 IntersectedFlags
= Root
.getFlags() & Prev
->getFlags() & Leaf
->getFlags();
838 IntersectedFlags
= Root
.getFlags() & Leaf
->getFlags();
840 auto GetOperandInfo
= [&](const MachineOperand
&Operand
, Register
&Reg
,
842 Reg
= Operand
.getReg();
843 MRI
.constrainRegClass(Reg
, RC
);
844 KillFlag
= Operand
.isKill();
847 auto GetFMAInstrInfo
= [&](const MachineInstr
&Instr
, Register
&MulOp1
,
848 Register
&MulOp2
, Register
&AddOp
,
849 bool &MulOp1KillFlag
, bool &MulOp2KillFlag
,
850 bool &AddOpKillFlag
) {
851 GetOperandInfo(Instr
.getOperand(FirstMulOpIdx
), MulOp1
, MulOp1KillFlag
);
852 GetOperandInfo(Instr
.getOperand(FirstMulOpIdx
+ 1), MulOp2
, MulOp2KillFlag
);
853 GetOperandInfo(Instr
.getOperand(AddOpIdx
), AddOp
, AddOpKillFlag
);
856 Register RegM11
, RegM12
, RegX
, RegY
, RegM21
, RegM22
, RegM31
, RegM32
, RegA11
,
858 bool KillX
= false, KillY
= false, KillM11
= false, KillM12
= false,
859 KillM21
= false, KillM22
= false, KillM31
= false, KillM32
= false,
860 KillA11
= false, KillA21
= false, KillB
= false;
862 GetFMAInstrInfo(Root
, RegM31
, RegM32
, RegB
, KillM31
, KillM32
, KillB
);
864 if (IsILPReassociate
)
865 GetFMAInstrInfo(*Prev
, RegM21
, RegM22
, RegA21
, KillM21
, KillM22
, KillA21
);
867 if (Pattern
== PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
) {
868 GetFMAInstrInfo(*Leaf
, RegM11
, RegM12
, RegA11
, KillM11
, KillM12
, KillA11
);
869 GetOperandInfo(Leaf
->getOperand(AddOpIdx
), RegX
, KillX
);
870 } else if (Pattern
== PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
) {
871 GetOperandInfo(Leaf
->getOperand(1), RegX
, KillX
);
872 GetOperandInfo(Leaf
->getOperand(2), RegY
, KillY
);
874 // Get FSUB instruction info.
875 GetOperandInfo(Leaf
->getOperand(1), RegX
, KillX
);
876 GetOperandInfo(Leaf
->getOperand(2), RegY
, KillY
);
879 // Create new virtual registers for the new results instead of
880 // recycling legacy ones because the MachineCombiner's computation of the
881 // critical path requires a new register definition rather than an existing
883 // For register pressure reassociation, we only need create one virtual
884 // register for the new fma.
885 Register NewVRA
= MRI
.createVirtualRegister(RC
);
886 InstrIdxForVirtReg
.insert(std::make_pair(NewVRA
, 0));
889 if (IsILPReassociate
) {
890 NewVRB
= MRI
.createVirtualRegister(RC
);
891 InstrIdxForVirtReg
.insert(std::make_pair(NewVRB
, 1));
895 if (Pattern
== PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
) {
896 NewVRD
= MRI
.createVirtualRegister(RC
);
897 InstrIdxForVirtReg
.insert(std::make_pair(NewVRD
, 2));
900 auto AdjustOperandOrder
= [&](MachineInstr
*MI
, Register RegAdd
, bool KillAdd
,
901 Register RegMul1
, bool KillRegMul1
,
902 Register RegMul2
, bool KillRegMul2
) {
903 MI
->getOperand(AddOpIdx
).setReg(RegAdd
);
904 MI
->getOperand(AddOpIdx
).setIsKill(KillAdd
);
905 MI
->getOperand(FirstMulOpIdx
).setReg(RegMul1
);
906 MI
->getOperand(FirstMulOpIdx
).setIsKill(KillRegMul1
);
907 MI
->getOperand(FirstMulOpIdx
+ 1).setReg(RegMul2
);
908 MI
->getOperand(FirstMulOpIdx
+ 1).setIsKill(KillRegMul2
);
911 MachineInstrBuilder NewARegPressure
, NewCRegPressure
;
914 llvm_unreachable("not recognized pattern!");
915 case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM
: {
916 // Create new instructions for insertion.
917 MachineInstrBuilder MINewB
=
918 BuildMI(*MF
, Prev
->getDebugLoc(), get(FmaOp
), NewVRB
)
919 .addReg(RegX
, getKillRegState(KillX
))
920 .addReg(RegM21
, getKillRegState(KillM21
))
921 .addReg(RegM22
, getKillRegState(KillM22
));
922 MachineInstrBuilder MINewA
=
923 BuildMI(*MF
, Root
.getDebugLoc(), get(FmaOp
), NewVRA
)
924 .addReg(RegY
, getKillRegState(KillY
))
925 .addReg(RegM31
, getKillRegState(KillM31
))
926 .addReg(RegM32
, getKillRegState(KillM32
));
927 // If AddOpIdx is not 1, adjust the order.
929 AdjustOperandOrder(MINewB
, RegX
, KillX
, RegM21
, KillM21
, RegM22
, KillM22
);
930 AdjustOperandOrder(MINewA
, RegY
, KillY
, RegM31
, KillM31
, RegM32
, KillM32
);
933 MachineInstrBuilder MINewC
=
934 BuildMI(*MF
, Root
.getDebugLoc(),
935 get(FMAOpIdxInfo
[Idx
][InfoArrayIdxFAddInst
]), RegC
)
936 .addReg(NewVRB
, getKillRegState(true))
937 .addReg(NewVRA
, getKillRegState(true));
939 // Update flags for newly created instructions.
940 setSpecialOperandAttr(*MINewA
, IntersectedFlags
);
941 setSpecialOperandAttr(*MINewB
, IntersectedFlags
);
942 setSpecialOperandAttr(*MINewC
, IntersectedFlags
);
944 // Record new instructions for insertion.
945 InsInstrs
.push_back(MINewA
);
946 InsInstrs
.push_back(MINewB
);
947 InsInstrs
.push_back(MINewC
);
950 case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM
: {
951 assert(NewVRD
&& "new FMA register not created!");
952 // Create new instructions for insertion.
953 MachineInstrBuilder MINewA
=
954 BuildMI(*MF
, Leaf
->getDebugLoc(),
955 get(FMAOpIdxInfo
[Idx
][InfoArrayIdxFMULInst
]), NewVRA
)
956 .addReg(RegM11
, getKillRegState(KillM11
))
957 .addReg(RegM12
, getKillRegState(KillM12
));
958 MachineInstrBuilder MINewB
=
959 BuildMI(*MF
, Prev
->getDebugLoc(), get(FmaOp
), NewVRB
)
960 .addReg(RegX
, getKillRegState(KillX
))
961 .addReg(RegM21
, getKillRegState(KillM21
))
962 .addReg(RegM22
, getKillRegState(KillM22
));
963 MachineInstrBuilder MINewD
=
964 BuildMI(*MF
, Root
.getDebugLoc(), get(FmaOp
), NewVRD
)
965 .addReg(NewVRA
, getKillRegState(true))
966 .addReg(RegM31
, getKillRegState(KillM31
))
967 .addReg(RegM32
, getKillRegState(KillM32
));
968 // If AddOpIdx is not 1, adjust the order.
970 AdjustOperandOrder(MINewB
, RegX
, KillX
, RegM21
, KillM21
, RegM22
, KillM22
);
971 AdjustOperandOrder(MINewD
, NewVRA
, true, RegM31
, KillM31
, RegM32
,
975 MachineInstrBuilder MINewC
=
976 BuildMI(*MF
, Root
.getDebugLoc(),
977 get(FMAOpIdxInfo
[Idx
][InfoArrayIdxFAddInst
]), RegC
)
978 .addReg(NewVRB
, getKillRegState(true))
979 .addReg(NewVRD
, getKillRegState(true));
981 // Update flags for newly created instructions.
982 setSpecialOperandAttr(*MINewA
, IntersectedFlags
);
983 setSpecialOperandAttr(*MINewB
, IntersectedFlags
);
984 setSpecialOperandAttr(*MINewD
, IntersectedFlags
);
985 setSpecialOperandAttr(*MINewC
, IntersectedFlags
);
987 // Record new instructions for insertion.
988 InsInstrs
.push_back(MINewA
);
989 InsInstrs
.push_back(MINewB
);
990 InsInstrs
.push_back(MINewD
);
991 InsInstrs
.push_back(MINewC
);
994 case PPCMachineCombinerPattern::REASSOC_XY_BAC
:
995 case PPCMachineCombinerPattern::REASSOC_XY_BCA
: {
997 bool KillVarReg
= false;
998 if (Pattern
== PPCMachineCombinerPattern::REASSOC_XY_BCA
) {
1000 KillVarReg
= KillM31
;
1003 KillVarReg
= KillM32
;
1005 // We don't want to get negative const from memory pool too early, as the
1006 // created entry will not be deleted even if it has no users. Since all
1007 // operand of Leaf and Root are virtual register, we use zero register
1008 // here as a placeholder. When the InsInstrs is selected in
1009 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1010 // with a virtual register which is a load from constant pool.
1011 NewARegPressure
= BuildMI(*MF
, Root
.getDebugLoc(), get(FmaOp
), NewVRA
)
1012 .addReg(RegB
, getKillRegState(RegB
))
1013 .addReg(RegY
, getKillRegState(KillY
))
1014 .addReg(PPC::ZERO8
);
1015 NewCRegPressure
= BuildMI(*MF
, Root
.getDebugLoc(), get(FmaOp
), RegC
)
1016 .addReg(NewVRA
, getKillRegState(true))
1017 .addReg(RegX
, getKillRegState(KillX
))
1018 .addReg(VarReg
, getKillRegState(KillVarReg
));
1019 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1020 // both at index 1, no need to adjust.
1021 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1022 // the operand index here.
1027 if (!IsILPReassociate
) {
1028 setSpecialOperandAttr(*NewARegPressure
, IntersectedFlags
);
1029 setSpecialOperandAttr(*NewCRegPressure
, IntersectedFlags
);
1031 InsInstrs
.push_back(NewARegPressure
);
1032 InsInstrs
.push_back(NewCRegPressure
);
1035 assert(!InsInstrs
.empty() &&
1036 "Insertion instructions set should not be empty!");
1038 // Record old instructions for deletion.
1039 DelInstrs
.push_back(Leaf
);
1040 if (IsILPReassociate
)
1041 DelInstrs
.push_back(Prev
);
1042 DelInstrs
.push_back(&Root
);
1045 // Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1046 bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr
&MI
,
1047 Register
&SrcReg
, Register
&DstReg
,
1048 unsigned &SubIdx
) const {
1049 switch (MI
.getOpcode()) {
1050 default: return false;
1053 case PPC::EXTSW_32_64
:
1054 SrcReg
= MI
.getOperand(1).getReg();
1055 DstReg
= MI
.getOperand(0).getReg();
1056 SubIdx
= PPC::sub_32
;
1061 Register
PPCInstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
1062 int &FrameIndex
) const {
1063 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI
.getOpcode())) {
1064 // Check for the operands added by addFrameReference (the immediate is the
1065 // offset which defaults to 0).
1066 if (MI
.getOperand(1).isImm() && !MI
.getOperand(1).getImm() &&
1067 MI
.getOperand(2).isFI()) {
1068 FrameIndex
= MI
.getOperand(2).getIndex();
1069 return MI
.getOperand(0).getReg();
1075 // For opcodes with the ReMaterializable flag set, this function is called to
1076 // verify the instruction is really rematable.
1077 bool PPCInstrInfo::isReallyTriviallyReMaterializable(
1078 const MachineInstr
&MI
) const {
1079 switch (MI
.getOpcode()) {
1081 // Let base implementaion decide.
1089 case PPC::ADDIStocHA
:
1090 case PPC::ADDIStocHA8
:
1092 case PPC::ADDItocL8
:
1093 case PPC::LOAD_STACK_GUARD
:
1094 case PPC::PPCLdFixedAddr
:
1096 case PPC::XXLXORspz
:
1097 case PPC::XXLXORdpz
:
1098 case PPC::XXLEQVOnes
:
1099 case PPC::XXSPLTI32DX
:
1101 case PPC::XXSPLTIDP
:
1105 case PPC::V_SETALLONESB
:
1106 case PPC::V_SETALLONESH
:
1107 case PPC::V_SETALLONES
:
1110 case PPC::XXSETACCZ
:
1111 case PPC::XXSETACCZW
:
1114 return TargetInstrInfo::isReallyTriviallyReMaterializable(MI
);
1117 Register
PPCInstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
1118 int &FrameIndex
) const {
1119 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI
.getOpcode())) {
1120 if (MI
.getOperand(1).isImm() && !MI
.getOperand(1).getImm() &&
1121 MI
.getOperand(2).isFI()) {
1122 FrameIndex
= MI
.getOperand(2).getIndex();
1123 return MI
.getOperand(0).getReg();
1129 MachineInstr
*PPCInstrInfo::commuteInstructionImpl(MachineInstr
&MI
, bool NewMI
,
1131 unsigned OpIdx2
) const {
1132 MachineFunction
&MF
= *MI
.getParent()->getParent();
1134 // Normal instructions can be commuted the obvious way.
1135 if (MI
.getOpcode() != PPC::RLWIMI
&& MI
.getOpcode() != PPC::RLWIMI_rec
)
1136 return TargetInstrInfo::commuteInstructionImpl(MI
, NewMI
, OpIdx1
, OpIdx2
);
1137 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1138 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1139 // changing the relative order of the mask operands might change what happens
1140 // to the high-bits of the mask (and, thus, the result).
1142 // Cannot commute if it has a non-zero rotate count.
1143 if (MI
.getOperand(3).getImm() != 0)
1146 // If we have a zero rotate count, we have:
1148 // Op0 = (Op1 & ~M) | (Op2 & M)
1150 // M = mask((ME+1)&31, (MB-1)&31)
1151 // Op0 = (Op2 & ~M) | (Op1 & M)
1154 assert(((OpIdx1
== 1 && OpIdx2
== 2) || (OpIdx1
== 2 && OpIdx2
== 1)) &&
1155 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1156 Register Reg0
= MI
.getOperand(0).getReg();
1157 Register Reg1
= MI
.getOperand(1).getReg();
1158 Register Reg2
= MI
.getOperand(2).getReg();
1159 unsigned SubReg1
= MI
.getOperand(1).getSubReg();
1160 unsigned SubReg2
= MI
.getOperand(2).getSubReg();
1161 bool Reg1IsKill
= MI
.getOperand(1).isKill();
1162 bool Reg2IsKill
= MI
.getOperand(2).isKill();
1163 bool ChangeReg0
= false;
1164 // If machine instrs are no longer in two-address forms, update
1165 // destination register as well.
1167 // Must be two address instruction (i.e. op1 is tied to op0).
1168 assert(MI
.getDesc().getOperandConstraint(1, MCOI::TIED_TO
) == 0 &&
1169 "Expecting a two-address instruction!");
1170 assert(MI
.getOperand(0).getSubReg() == SubReg1
&& "Tied subreg mismatch");
1176 unsigned MB
= MI
.getOperand(4).getImm();
1177 unsigned ME
= MI
.getOperand(5).getImm();
1179 // We can't commute a trivial mask (there is no way to represent an all-zero
1181 if (MB
== 0 && ME
== 31)
1185 // Create a new instruction.
1186 Register Reg0
= ChangeReg0
? Reg2
: MI
.getOperand(0).getReg();
1187 bool Reg0IsDead
= MI
.getOperand(0).isDead();
1188 return BuildMI(MF
, MI
.getDebugLoc(), MI
.getDesc())
1189 .addReg(Reg0
, RegState::Define
| getDeadRegState(Reg0IsDead
))
1190 .addReg(Reg2
, getKillRegState(Reg2IsKill
))
1191 .addReg(Reg1
, getKillRegState(Reg1IsKill
))
1192 .addImm((ME
+ 1) & 31)
1193 .addImm((MB
- 1) & 31);
1197 MI
.getOperand(0).setReg(Reg2
);
1198 MI
.getOperand(0).setSubReg(SubReg2
);
1200 MI
.getOperand(2).setReg(Reg1
);
1201 MI
.getOperand(1).setReg(Reg2
);
1202 MI
.getOperand(2).setSubReg(SubReg1
);
1203 MI
.getOperand(1).setSubReg(SubReg2
);
1204 MI
.getOperand(2).setIsKill(Reg1IsKill
);
1205 MI
.getOperand(1).setIsKill(Reg2IsKill
);
1207 // Swap the mask around.
1208 MI
.getOperand(4).setImm((ME
+ 1) & 31);
1209 MI
.getOperand(5).setImm((MB
- 1) & 31);
1213 bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr
&MI
,
1214 unsigned &SrcOpIdx1
,
1215 unsigned &SrcOpIdx2
) const {
1216 // For VSX A-Type FMA instructions, it is the first two operands that can be
1217 // commuted, however, because the non-encoded tied input operand is listed
1218 // first, the operands to swap are actually the second and third.
1220 int AltOpc
= PPC::getAltVSXFMAOpcode(MI
.getOpcode());
1222 return TargetInstrInfo::findCommutedOpIndices(MI
, SrcOpIdx1
, SrcOpIdx2
);
1224 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1226 return fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, 2, 3);
1229 void PPCInstrInfo::insertNoop(MachineBasicBlock
&MBB
,
1230 MachineBasicBlock::iterator MI
) const {
1231 // This function is used for scheduling, and the nop wanted here is the type
1232 // that terminates dispatch groups on the POWER cores.
1233 unsigned Directive
= Subtarget
.getCPUDirective();
1235 switch (Directive
) {
1236 default: Opcode
= PPC::NOP
; break;
1237 case PPC::DIR_PWR6
: Opcode
= PPC::NOP_GT_PWR6
; break;
1238 case PPC::DIR_PWR7
: Opcode
= PPC::NOP_GT_PWR7
; break;
1239 case PPC::DIR_PWR8
: Opcode
= PPC::NOP_GT_PWR7
; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1240 // FIXME: Update when POWER9 scheduling model is ready.
1241 case PPC::DIR_PWR9
: Opcode
= PPC::NOP_GT_PWR7
; break;
1245 BuildMI(MBB
, MI
, DL
, get(Opcode
));
1248 /// Return the noop instruction to use for a noop.
1249 MCInst
PPCInstrInfo::getNop() const {
1251 Nop
.setOpcode(PPC::NOP
);
1256 // Note: If the condition register is set to CTR or CTR8 then this is a
1257 // BDNZ (imm == 1) or BDZ (imm == 0) branch.
1258 bool PPCInstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
1259 MachineBasicBlock
*&TBB
,
1260 MachineBasicBlock
*&FBB
,
1261 SmallVectorImpl
<MachineOperand
> &Cond
,
1262 bool AllowModify
) const {
1263 bool isPPC64
= Subtarget
.isPPC64();
1265 // If the block has no terminators, it just falls into the block after it.
1266 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
1270 if (!isUnpredicatedTerminator(*I
))
1274 // If the BB ends with an unconditional branch to the fallthrough BB,
1275 // we eliminate the branch instruction.
1276 if (I
->getOpcode() == PPC::B
&&
1277 MBB
.isLayoutSuccessor(I
->getOperand(0).getMBB())) {
1278 I
->eraseFromParent();
1280 // We update iterator after deleting the last branch.
1281 I
= MBB
.getLastNonDebugInstr();
1282 if (I
== MBB
.end() || !isUnpredicatedTerminator(*I
))
1287 // Get the last instruction in the block.
1288 MachineInstr
&LastInst
= *I
;
1290 // If there is only one terminator instruction, process it.
1291 if (I
== MBB
.begin() || !isUnpredicatedTerminator(*--I
)) {
1292 if (LastInst
.getOpcode() == PPC::B
) {
1293 if (!LastInst
.getOperand(0).isMBB())
1295 TBB
= LastInst
.getOperand(0).getMBB();
1297 } else if (LastInst
.getOpcode() == PPC::BCC
) {
1298 if (!LastInst
.getOperand(2).isMBB())
1300 // Block ends with fall-through condbranch.
1301 TBB
= LastInst
.getOperand(2).getMBB();
1302 Cond
.push_back(LastInst
.getOperand(0));
1303 Cond
.push_back(LastInst
.getOperand(1));
1305 } else if (LastInst
.getOpcode() == PPC::BC
) {
1306 if (!LastInst
.getOperand(1).isMBB())
1308 // Block ends with fall-through condbranch.
1309 TBB
= LastInst
.getOperand(1).getMBB();
1310 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
1311 Cond
.push_back(LastInst
.getOperand(0));
1313 } else if (LastInst
.getOpcode() == PPC::BCn
) {
1314 if (!LastInst
.getOperand(1).isMBB())
1316 // Block ends with fall-through condbranch.
1317 TBB
= LastInst
.getOperand(1).getMBB();
1318 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET
));
1319 Cond
.push_back(LastInst
.getOperand(0));
1321 } else if (LastInst
.getOpcode() == PPC::BDNZ8
||
1322 LastInst
.getOpcode() == PPC::BDNZ
) {
1323 if (!LastInst
.getOperand(0).isMBB())
1325 if (DisableCTRLoopAnal
)
1327 TBB
= LastInst
.getOperand(0).getMBB();
1328 Cond
.push_back(MachineOperand::CreateImm(1));
1329 Cond
.push_back(MachineOperand::CreateReg(isPPC64
? PPC::CTR8
: PPC::CTR
,
1332 } else if (LastInst
.getOpcode() == PPC::BDZ8
||
1333 LastInst
.getOpcode() == PPC::BDZ
) {
1334 if (!LastInst
.getOperand(0).isMBB())
1336 if (DisableCTRLoopAnal
)
1338 TBB
= LastInst
.getOperand(0).getMBB();
1339 Cond
.push_back(MachineOperand::CreateImm(0));
1340 Cond
.push_back(MachineOperand::CreateReg(isPPC64
? PPC::CTR8
: PPC::CTR
,
1345 // Otherwise, don't know what this is.
1349 // Get the instruction before it if it's a terminator.
1350 MachineInstr
&SecondLastInst
= *I
;
1352 // If there are three terminators, we don't know what sort of block this is.
1353 if (I
!= MBB
.begin() && isUnpredicatedTerminator(*--I
))
1356 // If the block ends with PPC::B and PPC:BCC, handle it.
1357 if (SecondLastInst
.getOpcode() == PPC::BCC
&&
1358 LastInst
.getOpcode() == PPC::B
) {
1359 if (!SecondLastInst
.getOperand(2).isMBB() ||
1360 !LastInst
.getOperand(0).isMBB())
1362 TBB
= SecondLastInst
.getOperand(2).getMBB();
1363 Cond
.push_back(SecondLastInst
.getOperand(0));
1364 Cond
.push_back(SecondLastInst
.getOperand(1));
1365 FBB
= LastInst
.getOperand(0).getMBB();
1367 } else if (SecondLastInst
.getOpcode() == PPC::BC
&&
1368 LastInst
.getOpcode() == PPC::B
) {
1369 if (!SecondLastInst
.getOperand(1).isMBB() ||
1370 !LastInst
.getOperand(0).isMBB())
1372 TBB
= SecondLastInst
.getOperand(1).getMBB();
1373 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
1374 Cond
.push_back(SecondLastInst
.getOperand(0));
1375 FBB
= LastInst
.getOperand(0).getMBB();
1377 } else if (SecondLastInst
.getOpcode() == PPC::BCn
&&
1378 LastInst
.getOpcode() == PPC::B
) {
1379 if (!SecondLastInst
.getOperand(1).isMBB() ||
1380 !LastInst
.getOperand(0).isMBB())
1382 TBB
= SecondLastInst
.getOperand(1).getMBB();
1383 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET
));
1384 Cond
.push_back(SecondLastInst
.getOperand(0));
1385 FBB
= LastInst
.getOperand(0).getMBB();
1387 } else if ((SecondLastInst
.getOpcode() == PPC::BDNZ8
||
1388 SecondLastInst
.getOpcode() == PPC::BDNZ
) &&
1389 LastInst
.getOpcode() == PPC::B
) {
1390 if (!SecondLastInst
.getOperand(0).isMBB() ||
1391 !LastInst
.getOperand(0).isMBB())
1393 if (DisableCTRLoopAnal
)
1395 TBB
= SecondLastInst
.getOperand(0).getMBB();
1396 Cond
.push_back(MachineOperand::CreateImm(1));
1397 Cond
.push_back(MachineOperand::CreateReg(isPPC64
? PPC::CTR8
: PPC::CTR
,
1399 FBB
= LastInst
.getOperand(0).getMBB();
1401 } else if ((SecondLastInst
.getOpcode() == PPC::BDZ8
||
1402 SecondLastInst
.getOpcode() == PPC::BDZ
) &&
1403 LastInst
.getOpcode() == PPC::B
) {
1404 if (!SecondLastInst
.getOperand(0).isMBB() ||
1405 !LastInst
.getOperand(0).isMBB())
1407 if (DisableCTRLoopAnal
)
1409 TBB
= SecondLastInst
.getOperand(0).getMBB();
1410 Cond
.push_back(MachineOperand::CreateImm(0));
1411 Cond
.push_back(MachineOperand::CreateReg(isPPC64
? PPC::CTR8
: PPC::CTR
,
1413 FBB
= LastInst
.getOperand(0).getMBB();
1417 // If the block ends with two PPC:Bs, handle it. The second one is not
1418 // executed, so remove it.
1419 if (SecondLastInst
.getOpcode() == PPC::B
&& LastInst
.getOpcode() == PPC::B
) {
1420 if (!SecondLastInst
.getOperand(0).isMBB())
1422 TBB
= SecondLastInst
.getOperand(0).getMBB();
1425 I
->eraseFromParent();
1429 // Otherwise, can't handle this.
1433 unsigned PPCInstrInfo::removeBranch(MachineBasicBlock
&MBB
,
1434 int *BytesRemoved
) const {
1435 assert(!BytesRemoved
&& "code size not handled");
1437 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
1441 if (I
->getOpcode() != PPC::B
&& I
->getOpcode() != PPC::BCC
&&
1442 I
->getOpcode() != PPC::BC
&& I
->getOpcode() != PPC::BCn
&&
1443 I
->getOpcode() != PPC::BDNZ8
&& I
->getOpcode() != PPC::BDNZ
&&
1444 I
->getOpcode() != PPC::BDZ8
&& I
->getOpcode() != PPC::BDZ
)
1447 // Remove the branch.
1448 I
->eraseFromParent();
1452 if (I
== MBB
.begin()) return 1;
1454 if (I
->getOpcode() != PPC::BCC
&&
1455 I
->getOpcode() != PPC::BC
&& I
->getOpcode() != PPC::BCn
&&
1456 I
->getOpcode() != PPC::BDNZ8
&& I
->getOpcode() != PPC::BDNZ
&&
1457 I
->getOpcode() != PPC::BDZ8
&& I
->getOpcode() != PPC::BDZ
)
1460 // Remove the branch.
1461 I
->eraseFromParent();
1465 unsigned PPCInstrInfo::insertBranch(MachineBasicBlock
&MBB
,
1466 MachineBasicBlock
*TBB
,
1467 MachineBasicBlock
*FBB
,
1468 ArrayRef
<MachineOperand
> Cond
,
1470 int *BytesAdded
) const {
1471 // Shouldn't be a fall through.
1472 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
1473 assert((Cond
.size() == 2 || Cond
.size() == 0) &&
1474 "PPC branch conditions have two components!");
1475 assert(!BytesAdded
&& "code size not handled");
1477 bool isPPC64
= Subtarget
.isPPC64();
1481 if (Cond
.empty()) // Unconditional branch
1482 BuildMI(&MBB
, DL
, get(PPC::B
)).addMBB(TBB
);
1483 else if (Cond
[1].getReg() == PPC::CTR
|| Cond
[1].getReg() == PPC::CTR8
)
1484 BuildMI(&MBB
, DL
, get(Cond
[0].getImm() ?
1485 (isPPC64
? PPC::BDNZ8
: PPC::BDNZ
) :
1486 (isPPC64
? PPC::BDZ8
: PPC::BDZ
))).addMBB(TBB
);
1487 else if (Cond
[0].getImm() == PPC::PRED_BIT_SET
)
1488 BuildMI(&MBB
, DL
, get(PPC::BC
)).add(Cond
[1]).addMBB(TBB
);
1489 else if (Cond
[0].getImm() == PPC::PRED_BIT_UNSET
)
1490 BuildMI(&MBB
, DL
, get(PPC::BCn
)).add(Cond
[1]).addMBB(TBB
);
1491 else // Conditional branch
1492 BuildMI(&MBB
, DL
, get(PPC::BCC
))
1493 .addImm(Cond
[0].getImm())
1499 // Two-way Conditional Branch.
1500 if (Cond
[1].getReg() == PPC::CTR
|| Cond
[1].getReg() == PPC::CTR8
)
1501 BuildMI(&MBB
, DL
, get(Cond
[0].getImm() ?
1502 (isPPC64
? PPC::BDNZ8
: PPC::BDNZ
) :
1503 (isPPC64
? PPC::BDZ8
: PPC::BDZ
))).addMBB(TBB
);
1504 else if (Cond
[0].getImm() == PPC::PRED_BIT_SET
)
1505 BuildMI(&MBB
, DL
, get(PPC::BC
)).add(Cond
[1]).addMBB(TBB
);
1506 else if (Cond
[0].getImm() == PPC::PRED_BIT_UNSET
)
1507 BuildMI(&MBB
, DL
, get(PPC::BCn
)).add(Cond
[1]).addMBB(TBB
);
1509 BuildMI(&MBB
, DL
, get(PPC::BCC
))
1510 .addImm(Cond
[0].getImm())
1513 BuildMI(&MBB
, DL
, get(PPC::B
)).addMBB(FBB
);
1518 bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock
&MBB
,
1519 ArrayRef
<MachineOperand
> Cond
,
1520 Register DstReg
, Register TrueReg
,
1521 Register FalseReg
, int &CondCycles
,
1522 int &TrueCycles
, int &FalseCycles
) const {
1523 if (!Subtarget
.hasISEL())
1526 if (Cond
.size() != 2)
1529 // If this is really a bdnz-like condition, then it cannot be turned into a
1531 if (Cond
[1].getReg() == PPC::CTR
|| Cond
[1].getReg() == PPC::CTR8
)
1534 // If the conditional branch uses a physical register, then it cannot be
1535 // turned into a select.
1536 if (Cond
[1].getReg().isPhysical())
1539 // Check register classes.
1540 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
1541 const TargetRegisterClass
*RC
=
1542 RI
.getCommonSubClass(MRI
.getRegClass(TrueReg
), MRI
.getRegClass(FalseReg
));
1546 // isel is for regular integer GPRs only.
1547 if (!PPC::GPRCRegClass
.hasSubClassEq(RC
) &&
1548 !PPC::GPRC_NOR0RegClass
.hasSubClassEq(RC
) &&
1549 !PPC::G8RCRegClass
.hasSubClassEq(RC
) &&
1550 !PPC::G8RC_NOX0RegClass
.hasSubClassEq(RC
))
1553 // FIXME: These numbers are for the A2, how well they work for other cores is
1554 // an open question. On the A2, the isel instruction has a 2-cycle latency
1555 // but single-cycle throughput. These numbers are used in combination with
1556 // the MispredictPenalty setting from the active SchedMachineModel.
1564 void PPCInstrInfo::insertSelect(MachineBasicBlock
&MBB
,
1565 MachineBasicBlock::iterator MI
,
1566 const DebugLoc
&dl
, Register DestReg
,
1567 ArrayRef
<MachineOperand
> Cond
, Register TrueReg
,
1568 Register FalseReg
) const {
1569 assert(Cond
.size() == 2 &&
1570 "PPC branch conditions have two components!");
1572 // Get the register classes.
1573 MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
1574 const TargetRegisterClass
*RC
=
1575 RI
.getCommonSubClass(MRI
.getRegClass(TrueReg
), MRI
.getRegClass(FalseReg
));
1576 assert(RC
&& "TrueReg and FalseReg must have overlapping register classes");
1578 bool Is64Bit
= PPC::G8RCRegClass
.hasSubClassEq(RC
) ||
1579 PPC::G8RC_NOX0RegClass
.hasSubClassEq(RC
);
1581 PPC::GPRCRegClass
.hasSubClassEq(RC
) ||
1582 PPC::GPRC_NOR0RegClass
.hasSubClassEq(RC
)) &&
1583 "isel is for regular integer GPRs only");
1585 unsigned OpCode
= Is64Bit
? PPC::ISEL8
: PPC::ISEL
;
1586 auto SelectPred
= static_cast<PPC::Predicate
>(Cond
[0].getImm());
1588 unsigned SubIdx
= 0;
1589 bool SwapOps
= false;
1590 switch (SelectPred
) {
1592 case PPC::PRED_EQ_MINUS
:
1593 case PPC::PRED_EQ_PLUS
:
1594 SubIdx
= PPC::sub_eq
; SwapOps
= false; break;
1596 case PPC::PRED_NE_MINUS
:
1597 case PPC::PRED_NE_PLUS
:
1598 SubIdx
= PPC::sub_eq
; SwapOps
= true; break;
1600 case PPC::PRED_LT_MINUS
:
1601 case PPC::PRED_LT_PLUS
:
1602 SubIdx
= PPC::sub_lt
; SwapOps
= false; break;
1604 case PPC::PRED_GE_MINUS
:
1605 case PPC::PRED_GE_PLUS
:
1606 SubIdx
= PPC::sub_lt
; SwapOps
= true; break;
1608 case PPC::PRED_GT_MINUS
:
1609 case PPC::PRED_GT_PLUS
:
1610 SubIdx
= PPC::sub_gt
; SwapOps
= false; break;
1612 case PPC::PRED_LE_MINUS
:
1613 case PPC::PRED_LE_PLUS
:
1614 SubIdx
= PPC::sub_gt
; SwapOps
= true; break;
1616 case PPC::PRED_UN_MINUS
:
1617 case PPC::PRED_UN_PLUS
:
1618 SubIdx
= PPC::sub_un
; SwapOps
= false; break;
1620 case PPC::PRED_NU_MINUS
:
1621 case PPC::PRED_NU_PLUS
:
1622 SubIdx
= PPC::sub_un
; SwapOps
= true; break;
1623 case PPC::PRED_BIT_SET
: SubIdx
= 0; SwapOps
= false; break;
1624 case PPC::PRED_BIT_UNSET
: SubIdx
= 0; SwapOps
= true; break;
1627 Register FirstReg
= SwapOps
? FalseReg
: TrueReg
,
1628 SecondReg
= SwapOps
? TrueReg
: FalseReg
;
1630 // The first input register of isel cannot be r0. If it is a member
1631 // of a register class that can be r0, then copy it first (the
1632 // register allocator should eliminate the copy).
1633 if (MRI
.getRegClass(FirstReg
)->contains(PPC::R0
) ||
1634 MRI
.getRegClass(FirstReg
)->contains(PPC::X0
)) {
1635 const TargetRegisterClass
*FirstRC
=
1636 MRI
.getRegClass(FirstReg
)->contains(PPC::X0
) ?
1637 &PPC::G8RC_NOX0RegClass
: &PPC::GPRC_NOR0RegClass
;
1638 Register OldFirstReg
= FirstReg
;
1639 FirstReg
= MRI
.createVirtualRegister(FirstRC
);
1640 BuildMI(MBB
, MI
, dl
, get(TargetOpcode::COPY
), FirstReg
)
1641 .addReg(OldFirstReg
);
1644 BuildMI(MBB
, MI
, dl
, get(OpCode
), DestReg
)
1645 .addReg(FirstReg
).addReg(SecondReg
)
1646 .addReg(Cond
[1].getReg(), 0, SubIdx
);
1649 static unsigned getCRBitValue(unsigned CRBit
) {
1651 if (CRBit
== PPC::CR0LT
|| CRBit
== PPC::CR1LT
||
1652 CRBit
== PPC::CR2LT
|| CRBit
== PPC::CR3LT
||
1653 CRBit
== PPC::CR4LT
|| CRBit
== PPC::CR5LT
||
1654 CRBit
== PPC::CR6LT
|| CRBit
== PPC::CR7LT
)
1656 if (CRBit
== PPC::CR0GT
|| CRBit
== PPC::CR1GT
||
1657 CRBit
== PPC::CR2GT
|| CRBit
== PPC::CR3GT
||
1658 CRBit
== PPC::CR4GT
|| CRBit
== PPC::CR5GT
||
1659 CRBit
== PPC::CR6GT
|| CRBit
== PPC::CR7GT
)
1661 if (CRBit
== PPC::CR0EQ
|| CRBit
== PPC::CR1EQ
||
1662 CRBit
== PPC::CR2EQ
|| CRBit
== PPC::CR3EQ
||
1663 CRBit
== PPC::CR4EQ
|| CRBit
== PPC::CR5EQ
||
1664 CRBit
== PPC::CR6EQ
|| CRBit
== PPC::CR7EQ
)
1666 if (CRBit
== PPC::CR0UN
|| CRBit
== PPC::CR1UN
||
1667 CRBit
== PPC::CR2UN
|| CRBit
== PPC::CR3UN
||
1668 CRBit
== PPC::CR4UN
|| CRBit
== PPC::CR5UN
||
1669 CRBit
== PPC::CR6UN
|| CRBit
== PPC::CR7UN
)
1672 assert(Ret
!= 4 && "Invalid CR bit register");
1676 void PPCInstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
1677 MachineBasicBlock::iterator I
,
1678 const DebugLoc
&DL
, MCRegister DestReg
,
1679 MCRegister SrcReg
, bool KillSrc
,
1680 bool RenamableDest
, bool RenamableSrc
) const {
1681 // We can end up with self copies and similar things as a result of VSX copy
1682 // legalization. Promote them here.
1683 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
1684 if (PPC::F8RCRegClass
.contains(DestReg
) &&
1685 PPC::VSRCRegClass
.contains(SrcReg
)) {
1686 MCRegister SuperReg
=
1687 TRI
->getMatchingSuperReg(DestReg
, PPC::sub_64
, &PPC::VSRCRegClass
);
1689 if (VSXSelfCopyCrash
&& SrcReg
== SuperReg
)
1690 llvm_unreachable("nop VSX copy");
1693 } else if (PPC::F8RCRegClass
.contains(SrcReg
) &&
1694 PPC::VSRCRegClass
.contains(DestReg
)) {
1695 MCRegister SuperReg
=
1696 TRI
->getMatchingSuperReg(SrcReg
, PPC::sub_64
, &PPC::VSRCRegClass
);
1698 if (VSXSelfCopyCrash
&& DestReg
== SuperReg
)
1699 llvm_unreachable("nop VSX copy");
1704 // Different class register copy
1705 if (PPC::CRBITRCRegClass
.contains(SrcReg
) &&
1706 PPC::GPRCRegClass
.contains(DestReg
)) {
1707 MCRegister CRReg
= getCRFromCRBit(SrcReg
);
1708 BuildMI(MBB
, I
, DL
, get(PPC::MFOCRF
), DestReg
).addReg(CRReg
);
1709 getKillRegState(KillSrc
);
1710 // Rotate the CR bit in the CR fields to be the least significant bit and
1711 // then mask with 0x1 (MB = ME = 31).
1712 BuildMI(MBB
, I
, DL
, get(PPC::RLWINM
), DestReg
)
1713 .addReg(DestReg
, RegState::Kill
)
1714 .addImm(TRI
->getEncodingValue(CRReg
) * 4 + (4 - getCRBitValue(SrcReg
)))
1718 } else if (PPC::CRRCRegClass
.contains(SrcReg
) &&
1719 (PPC::G8RCRegClass
.contains(DestReg
) ||
1720 PPC::GPRCRegClass
.contains(DestReg
))) {
1721 bool Is64Bit
= PPC::G8RCRegClass
.contains(DestReg
);
1722 unsigned MvCode
= Is64Bit
? PPC::MFOCRF8
: PPC::MFOCRF
;
1723 unsigned ShCode
= Is64Bit
? PPC::RLWINM8
: PPC::RLWINM
;
1724 unsigned CRNum
= TRI
->getEncodingValue(SrcReg
);
1725 BuildMI(MBB
, I
, DL
, get(MvCode
), DestReg
).addReg(SrcReg
);
1726 getKillRegState(KillSrc
);
1729 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1730 BuildMI(MBB
, I
, DL
, get(ShCode
), DestReg
)
1731 .addReg(DestReg
, RegState::Kill
)
1732 .addImm(CRNum
* 4 + 4)
1736 } else if (PPC::G8RCRegClass
.contains(SrcReg
) &&
1737 PPC::VSFRCRegClass
.contains(DestReg
)) {
1738 assert(Subtarget
.hasDirectMove() &&
1739 "Subtarget doesn't support directmove, don't know how to copy.");
1740 BuildMI(MBB
, I
, DL
, get(PPC::MTVSRD
), DestReg
).addReg(SrcReg
);
1742 getKillRegState(KillSrc
);
1744 } else if (PPC::VSFRCRegClass
.contains(SrcReg
) &&
1745 PPC::G8RCRegClass
.contains(DestReg
)) {
1746 assert(Subtarget
.hasDirectMove() &&
1747 "Subtarget doesn't support directmove, don't know how to copy.");
1748 BuildMI(MBB
, I
, DL
, get(PPC::MFVSRD
), DestReg
).addReg(SrcReg
);
1749 getKillRegState(KillSrc
);
1751 } else if (PPC::SPERCRegClass
.contains(SrcReg
) &&
1752 PPC::GPRCRegClass
.contains(DestReg
)) {
1753 BuildMI(MBB
, I
, DL
, get(PPC::EFSCFD
), DestReg
).addReg(SrcReg
);
1754 getKillRegState(KillSrc
);
1756 } else if (PPC::GPRCRegClass
.contains(SrcReg
) &&
1757 PPC::SPERCRegClass
.contains(DestReg
)) {
1758 BuildMI(MBB
, I
, DL
, get(PPC::EFDCFS
), DestReg
).addReg(SrcReg
);
1759 getKillRegState(KillSrc
);
1764 if (PPC::GPRCRegClass
.contains(DestReg
, SrcReg
))
1766 else if (PPC::G8RCRegClass
.contains(DestReg
, SrcReg
))
1768 else if (PPC::F4RCRegClass
.contains(DestReg
, SrcReg
))
1770 else if (PPC::CRRCRegClass
.contains(DestReg
, SrcReg
))
1772 else if (PPC::VRRCRegClass
.contains(DestReg
, SrcReg
))
1774 else if (PPC::VSRCRegClass
.contains(DestReg
, SrcReg
))
1775 // There are two different ways this can be done:
1776 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1777 // issue in VSU pipeline 0.
1778 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1779 // can go to either pipeline.
1780 // We'll always use xxlor here, because in practically all cases where
1781 // copies are generated, they are close enough to some use that the
1782 // lower-latency form is preferable.
1784 else if (PPC::VSFRCRegClass
.contains(DestReg
, SrcReg
) ||
1785 PPC::VSSRCRegClass
.contains(DestReg
, SrcReg
))
1786 Opc
= (Subtarget
.hasP9Vector()) ? PPC::XSCPSGNDP
: PPC::XXLORf
;
1787 else if (Subtarget
.pairedVectorMemops() &&
1788 PPC::VSRpRCRegClass
.contains(DestReg
, SrcReg
)) {
1789 if (SrcReg
> PPC::VSRp15
)
1790 SrcReg
= PPC::V0
+ (SrcReg
- PPC::VSRp16
) * 2;
1792 SrcReg
= PPC::VSL0
+ (SrcReg
- PPC::VSRp0
) * 2;
1793 if (DestReg
> PPC::VSRp15
)
1794 DestReg
= PPC::V0
+ (DestReg
- PPC::VSRp16
) * 2;
1796 DestReg
= PPC::VSL0
+ (DestReg
- PPC::VSRp0
) * 2;
1797 BuildMI(MBB
, I
, DL
, get(PPC::XXLOR
), DestReg
).
1798 addReg(SrcReg
).addReg(SrcReg
, getKillRegState(KillSrc
));
1799 BuildMI(MBB
, I
, DL
, get(PPC::XXLOR
), DestReg
+ 1).
1800 addReg(SrcReg
+ 1).addReg(SrcReg
+ 1, getKillRegState(KillSrc
));
1803 else if (PPC::CRBITRCRegClass
.contains(DestReg
, SrcReg
))
1805 else if (PPC::SPERCRegClass
.contains(DestReg
, SrcReg
))
1807 else if ((PPC::ACCRCRegClass
.contains(DestReg
) ||
1808 PPC::UACCRCRegClass
.contains(DestReg
)) &&
1809 (PPC::ACCRCRegClass
.contains(SrcReg
) ||
1810 PPC::UACCRCRegClass
.contains(SrcReg
))) {
1811 // If primed, de-prime the source register, copy the individual registers
1812 // and prime the destination if needed. The vector subregisters are
1813 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1814 // source is primed, we need to re-prime it after the copy as well.
1815 PPCRegisterInfo::emitAccCopyInfo(MBB
, DestReg
, SrcReg
);
1816 bool DestPrimed
= PPC::ACCRCRegClass
.contains(DestReg
);
1817 bool SrcPrimed
= PPC::ACCRCRegClass
.contains(SrcReg
);
1818 MCRegister VSLSrcReg
=
1819 PPC::VSL0
+ (SrcReg
- (SrcPrimed
? PPC::ACC0
: PPC::UACC0
)) * 4;
1820 MCRegister VSLDestReg
=
1821 PPC::VSL0
+ (DestReg
- (DestPrimed
? PPC::ACC0
: PPC::UACC0
)) * 4;
1823 BuildMI(MBB
, I
, DL
, get(PPC::XXMFACC
), SrcReg
).addReg(SrcReg
);
1824 for (unsigned Idx
= 0; Idx
< 4; Idx
++)
1825 BuildMI(MBB
, I
, DL
, get(PPC::XXLOR
), VSLDestReg
+ Idx
)
1826 .addReg(VSLSrcReg
+ Idx
)
1827 .addReg(VSLSrcReg
+ Idx
, getKillRegState(KillSrc
));
1829 BuildMI(MBB
, I
, DL
, get(PPC::XXMTACC
), DestReg
).addReg(DestReg
);
1830 if (SrcPrimed
&& !KillSrc
)
1831 BuildMI(MBB
, I
, DL
, get(PPC::XXMTACC
), SrcReg
).addReg(SrcReg
);
1833 } else if (PPC::G8pRCRegClass
.contains(DestReg
) &&
1834 PPC::G8pRCRegClass
.contains(SrcReg
)) {
1835 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1836 unsigned DestRegIdx
= DestReg
- PPC::G8p0
;
1837 MCRegister DestRegSub0
= PPC::X0
+ 2 * DestRegIdx
;
1838 MCRegister DestRegSub1
= PPC::X0
+ 2 * DestRegIdx
+ 1;
1839 unsigned SrcRegIdx
= SrcReg
- PPC::G8p0
;
1840 MCRegister SrcRegSub0
= PPC::X0
+ 2 * SrcRegIdx
;
1841 MCRegister SrcRegSub1
= PPC::X0
+ 2 * SrcRegIdx
+ 1;
1842 BuildMI(MBB
, I
, DL
, get(PPC::OR8
), DestRegSub0
)
1844 .addReg(SrcRegSub0
, getKillRegState(KillSrc
));
1845 BuildMI(MBB
, I
, DL
, get(PPC::OR8
), DestRegSub1
)
1847 .addReg(SrcRegSub1
, getKillRegState(KillSrc
));
1850 llvm_unreachable("Impossible reg-to-reg copy");
1852 const MCInstrDesc
&MCID
= get(Opc
);
1853 if (MCID
.getNumOperands() == 3)
1854 BuildMI(MBB
, I
, DL
, MCID
, DestReg
)
1855 .addReg(SrcReg
).addReg(SrcReg
, getKillRegState(KillSrc
));
1857 BuildMI(MBB
, I
, DL
, MCID
, DestReg
).addReg(SrcReg
, getKillRegState(KillSrc
));
1860 unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass
*RC
) const {
1861 int OpcodeIndex
= 0;
1863 if (PPC::GPRCRegClass
.hasSubClassEq(RC
) ||
1864 PPC::GPRC_NOR0RegClass
.hasSubClassEq(RC
)) {
1865 OpcodeIndex
= SOK_Int4Spill
;
1866 } else if (PPC::G8RCRegClass
.hasSubClassEq(RC
) ||
1867 PPC::G8RC_NOX0RegClass
.hasSubClassEq(RC
)) {
1868 OpcodeIndex
= SOK_Int8Spill
;
1869 } else if (PPC::F8RCRegClass
.hasSubClassEq(RC
)) {
1870 OpcodeIndex
= SOK_Float8Spill
;
1871 } else if (PPC::F4RCRegClass
.hasSubClassEq(RC
)) {
1872 OpcodeIndex
= SOK_Float4Spill
;
1873 } else if (PPC::SPERCRegClass
.hasSubClassEq(RC
)) {
1874 OpcodeIndex
= SOK_SPESpill
;
1875 } else if (PPC::CRRCRegClass
.hasSubClassEq(RC
)) {
1876 OpcodeIndex
= SOK_CRSpill
;
1877 } else if (PPC::CRBITRCRegClass
.hasSubClassEq(RC
)) {
1878 OpcodeIndex
= SOK_CRBitSpill
;
1879 } else if (PPC::VRRCRegClass
.hasSubClassEq(RC
)) {
1880 OpcodeIndex
= SOK_VRVectorSpill
;
1881 } else if (PPC::VSRCRegClass
.hasSubClassEq(RC
)) {
1882 OpcodeIndex
= SOK_VSXVectorSpill
;
1883 } else if (PPC::VSFRCRegClass
.hasSubClassEq(RC
)) {
1884 OpcodeIndex
= SOK_VectorFloat8Spill
;
1885 } else if (PPC::VSSRCRegClass
.hasSubClassEq(RC
)) {
1886 OpcodeIndex
= SOK_VectorFloat4Spill
;
1887 } else if (PPC::SPILLTOVSRRCRegClass
.hasSubClassEq(RC
)) {
1888 OpcodeIndex
= SOK_SpillToVSR
;
1889 } else if (PPC::ACCRCRegClass
.hasSubClassEq(RC
)) {
1890 assert(Subtarget
.pairedVectorMemops() &&
1891 "Register unexpected when paired memops are disabled.");
1892 OpcodeIndex
= SOK_AccumulatorSpill
;
1893 } else if (PPC::UACCRCRegClass
.hasSubClassEq(RC
)) {
1894 assert(Subtarget
.pairedVectorMemops() &&
1895 "Register unexpected when paired memops are disabled.");
1896 OpcodeIndex
= SOK_UAccumulatorSpill
;
1897 } else if (PPC::WACCRCRegClass
.hasSubClassEq(RC
)) {
1898 assert(Subtarget
.pairedVectorMemops() &&
1899 "Register unexpected when paired memops are disabled.");
1900 OpcodeIndex
= SOK_WAccumulatorSpill
;
1901 } else if (PPC::VSRpRCRegClass
.hasSubClassEq(RC
)) {
1902 assert(Subtarget
.pairedVectorMemops() &&
1903 "Register unexpected when paired memops are disabled.");
1904 OpcodeIndex
= SOK_PairedVecSpill
;
1905 } else if (PPC::G8pRCRegClass
.hasSubClassEq(RC
)) {
1906 OpcodeIndex
= SOK_PairedG8Spill
;
1908 llvm_unreachable("Unknown regclass!");
1914 PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass
*RC
) const {
1915 ArrayRef
<unsigned> OpcodesForSpill
= getStoreOpcodesForSpillArray();
1916 return OpcodesForSpill
[getSpillIndex(RC
)];
1920 PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass
*RC
) const {
1921 ArrayRef
<unsigned> OpcodesForSpill
= getLoadOpcodesForSpillArray();
1922 return OpcodesForSpill
[getSpillIndex(RC
)];
1925 void PPCInstrInfo::StoreRegToStackSlot(
1926 MachineFunction
&MF
, unsigned SrcReg
, bool isKill
, int FrameIdx
,
1927 const TargetRegisterClass
*RC
,
1928 SmallVectorImpl
<MachineInstr
*> &NewMIs
) const {
1929 unsigned Opcode
= getStoreOpcodeForSpill(RC
);
1932 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
1933 FuncInfo
->setHasSpills();
1935 NewMIs
.push_back(addFrameReference(
1936 BuildMI(MF
, DL
, get(Opcode
)).addReg(SrcReg
, getKillRegState(isKill
)),
1939 if (PPC::CRRCRegClass
.hasSubClassEq(RC
) ||
1940 PPC::CRBITRCRegClass
.hasSubClassEq(RC
))
1941 FuncInfo
->setSpillsCR();
1943 if (isXFormMemOp(Opcode
))
1944 FuncInfo
->setHasNonRISpills();
1947 void PPCInstrInfo::storeRegToStackSlotNoUpd(
1948 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
, unsigned SrcReg
,
1949 bool isKill
, int FrameIdx
, const TargetRegisterClass
*RC
,
1950 const TargetRegisterInfo
*TRI
) const {
1951 MachineFunction
&MF
= *MBB
.getParent();
1952 SmallVector
<MachineInstr
*, 4> NewMIs
;
1954 StoreRegToStackSlot(MF
, SrcReg
, isKill
, FrameIdx
, RC
, NewMIs
);
1956 for (MachineInstr
*NewMI
: NewMIs
)
1957 MBB
.insert(MI
, NewMI
);
1959 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1960 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
1961 MachinePointerInfo::getFixedStack(MF
, FrameIdx
),
1962 MachineMemOperand::MOStore
, MFI
.getObjectSize(FrameIdx
),
1963 MFI
.getObjectAlign(FrameIdx
));
1964 NewMIs
.back()->addMemOperand(MF
, MMO
);
1967 void PPCInstrInfo::storeRegToStackSlot(
1968 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
, Register SrcReg
,
1969 bool isKill
, int FrameIdx
, const TargetRegisterClass
*RC
,
1970 const TargetRegisterInfo
*TRI
, Register VReg
) const {
1971 // We need to avoid a situation in which the value from a VRRC register is
1972 // spilled using an Altivec instruction and reloaded into a VSRC register
1973 // using a VSX instruction. The issue with this is that the VSX
1974 // load/store instructions swap the doublewords in the vector and the Altivec
1975 // ones don't. The register classes on the spill/reload may be different if
1976 // the register is defined using an Altivec instruction and is then used by a
1979 storeRegToStackSlotNoUpd(MBB
, MI
, SrcReg
, isKill
, FrameIdx
, RC
, TRI
);
1982 void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction
&MF
, const DebugLoc
&DL
,
1983 unsigned DestReg
, int FrameIdx
,
1984 const TargetRegisterClass
*RC
,
1985 SmallVectorImpl
<MachineInstr
*> &NewMIs
)
1987 unsigned Opcode
= getLoadOpcodeForSpill(RC
);
1988 NewMIs
.push_back(addFrameReference(BuildMI(MF
, DL
, get(Opcode
), DestReg
),
1992 void PPCInstrInfo::loadRegFromStackSlotNoUpd(
1993 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
, unsigned DestReg
,
1994 int FrameIdx
, const TargetRegisterClass
*RC
,
1995 const TargetRegisterInfo
*TRI
) const {
1996 MachineFunction
&MF
= *MBB
.getParent();
1997 SmallVector
<MachineInstr
*, 4> NewMIs
;
1999 if (MI
!= MBB
.end()) DL
= MI
->getDebugLoc();
2001 LoadRegFromStackSlot(MF
, DL
, DestReg
, FrameIdx
, RC
, NewMIs
);
2003 for (MachineInstr
*NewMI
: NewMIs
)
2004 MBB
.insert(MI
, NewMI
);
2006 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2007 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(
2008 MachinePointerInfo::getFixedStack(MF
, FrameIdx
),
2009 MachineMemOperand::MOLoad
, MFI
.getObjectSize(FrameIdx
),
2010 MFI
.getObjectAlign(FrameIdx
));
2011 NewMIs
.back()->addMemOperand(MF
, MMO
);
2014 void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock
&MBB
,
2015 MachineBasicBlock::iterator MI
,
2016 Register DestReg
, int FrameIdx
,
2017 const TargetRegisterClass
*RC
,
2018 const TargetRegisterInfo
*TRI
,
2019 Register VReg
) const {
2020 // We need to avoid a situation in which the value from a VRRC register is
2021 // spilled using an Altivec instruction and reloaded into a VSRC register
2022 // using a VSX instruction. The issue with this is that the VSX
2023 // load/store instructions swap the doublewords in the vector and the Altivec
2024 // ones don't. The register classes on the spill/reload may be different if
2025 // the register is defined using an Altivec instruction and is then used by a
2029 loadRegFromStackSlotNoUpd(MBB
, MI
, DestReg
, FrameIdx
, RC
, TRI
);
2033 reverseBranchCondition(SmallVectorImpl
<MachineOperand
> &Cond
) const {
2034 assert(Cond
.size() == 2 && "Invalid PPC branch opcode!");
2035 if (Cond
[1].getReg() == PPC::CTR8
|| Cond
[1].getReg() == PPC::CTR
)
2036 Cond
[0].setImm(Cond
[0].getImm() == 0 ? 1 : 0);
2038 // Leave the CR# the same, but invert the condition.
2039 Cond
[0].setImm(PPC::InvertPredicate((PPC::Predicate
)Cond
[0].getImm()));
2043 // For some instructions, it is legal to fold ZERO into the RA register field.
2044 // This function performs that fold by replacing the operand with PPC::ZERO,
2045 // it does not consider whether the load immediate zero is no longer in use.
2046 bool PPCInstrInfo::onlyFoldImmediate(MachineInstr
&UseMI
, MachineInstr
&DefMI
,
2047 Register Reg
) const {
2048 // A zero immediate should always be loaded with a single li.
2049 unsigned DefOpc
= DefMI
.getOpcode();
2050 if (DefOpc
!= PPC::LI
&& DefOpc
!= PPC::LI8
)
2052 if (!DefMI
.getOperand(1).isImm())
2054 if (DefMI
.getOperand(1).getImm() != 0)
2057 // Note that we cannot here invert the arguments of an isel in order to fold
2058 // a ZERO into what is presented as the second argument. All we have here
2059 // is the condition bit, and that might come from a CR-logical bit operation.
2061 const MCInstrDesc
&UseMCID
= UseMI
.getDesc();
2063 // Only fold into real machine instructions.
2064 if (UseMCID
.isPseudo())
2067 // We need to find which of the User's operands is to be folded, that will be
2068 // the operand that matches the given register ID.
2070 for (UseIdx
= 0; UseIdx
< UseMI
.getNumOperands(); ++UseIdx
)
2071 if (UseMI
.getOperand(UseIdx
).isReg() &&
2072 UseMI
.getOperand(UseIdx
).getReg() == Reg
)
2075 assert(UseIdx
< UseMI
.getNumOperands() && "Cannot find Reg in UseMI");
2076 assert(UseIdx
< UseMCID
.getNumOperands() && "No operand description for Reg");
2078 const MCOperandInfo
*UseInfo
= &UseMCID
.operands()[UseIdx
];
2080 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2081 // register (which might also be specified as a pointer class kind).
2082 if (UseInfo
->isLookupPtrRegClass()) {
2083 if (UseInfo
->RegClass
/* Kind */ != 1)
2086 if (UseInfo
->RegClass
!= PPC::GPRC_NOR0RegClassID
&&
2087 UseInfo
->RegClass
!= PPC::G8RC_NOX0RegClassID
)
2091 // Make sure this is not tied to an output register (or otherwise
2092 // constrained). This is true for ST?UX registers, for example, which
2093 // are tied to their output registers.
2094 if (UseInfo
->Constraints
!= 0)
2098 if (UseInfo
->isLookupPtrRegClass()) {
2099 bool isPPC64
= Subtarget
.isPPC64();
2100 ZeroReg
= isPPC64
? PPC::ZERO8
: PPC::ZERO
;
2102 ZeroReg
= UseInfo
->RegClass
== PPC::G8RC_NOX0RegClassID
?
2103 PPC::ZERO8
: PPC::ZERO
;
2106 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2107 LLVM_DEBUG(UseMI
.dump());
2108 UseMI
.getOperand(UseIdx
).setReg(ZeroReg
);
2109 LLVM_DEBUG(dbgs() << "Into: ");
2110 LLVM_DEBUG(UseMI
.dump());
2114 // Folds zero into instructions which have a load immediate zero as an operand
2115 // but also recognize zero as immediate zero. If the definition of the load
2116 // has no more users it is deleted.
2117 bool PPCInstrInfo::foldImmediate(MachineInstr
&UseMI
, MachineInstr
&DefMI
,
2118 Register Reg
, MachineRegisterInfo
*MRI
) const {
2119 bool Changed
= onlyFoldImmediate(UseMI
, DefMI
, Reg
);
2120 if (MRI
->use_nodbg_empty(Reg
))
2121 DefMI
.eraseFromParent();
2125 static bool MBBDefinesCTR(MachineBasicBlock
&MBB
) {
2126 for (MachineInstr
&MI
: MBB
)
2127 if (MI
.definesRegister(PPC::CTR
, /*TRI=*/nullptr) ||
2128 MI
.definesRegister(PPC::CTR8
, /*TRI=*/nullptr))
2133 // We should make sure that, if we're going to predicate both sides of a
2134 // condition (a diamond), that both sides don't define the counter register. We
2135 // can predicate counter-decrement-based branches, but while that predicates
2136 // the branching, it does not predicate the counter decrement. If we tried to
2137 // merge the triangle into one predicated block, we'd decrement the counter
2139 bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock
&TMBB
,
2140 unsigned NumT
, unsigned ExtraT
,
2141 MachineBasicBlock
&FMBB
,
2142 unsigned NumF
, unsigned ExtraF
,
2143 BranchProbability Probability
) const {
2144 return !(MBBDefinesCTR(TMBB
) && MBBDefinesCTR(FMBB
));
2148 bool PPCInstrInfo::isPredicated(const MachineInstr
&MI
) const {
2149 // The predicated branches are identified by their type, not really by the
2150 // explicit presence of a predicate. Furthermore, some of them can be
2151 // predicated more than once. Because if conversion won't try to predicate
2152 // any instruction which already claims to be predicated (by returning true
2153 // here), always return false. In doing so, we let isPredicable() be the
2154 // final word on whether not the instruction can be (further) predicated.
2159 bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr
&MI
,
2160 const MachineBasicBlock
*MBB
,
2161 const MachineFunction
&MF
) const {
2162 switch (MI
.getOpcode()) {
2165 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2166 // across them, since some FP operations may change content of FPSCR.
2167 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2173 return TargetInstrInfo::isSchedulingBoundary(MI
, MBB
, MF
);
2176 bool PPCInstrInfo::PredicateInstruction(MachineInstr
&MI
,
2177 ArrayRef
<MachineOperand
> Pred
) const {
2178 unsigned OpC
= MI
.getOpcode();
2179 if (OpC
== PPC::BLR
|| OpC
== PPC::BLR8
) {
2180 if (Pred
[1].getReg() == PPC::CTR8
|| Pred
[1].getReg() == PPC::CTR
) {
2181 bool isPPC64
= Subtarget
.isPPC64();
2182 MI
.setDesc(get(Pred
[0].getImm() ? (isPPC64
? PPC::BDNZLR8
: PPC::BDNZLR
)
2183 : (isPPC64
? PPC::BDZLR8
: PPC::BDZLR
)));
2184 // Need add Def and Use for CTR implicit operand.
2185 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2186 .addReg(Pred
[1].getReg(), RegState::Implicit
)
2187 .addReg(Pred
[1].getReg(), RegState::ImplicitDefine
);
2188 } else if (Pred
[0].getImm() == PPC::PRED_BIT_SET
) {
2189 MI
.setDesc(get(PPC::BCLR
));
2190 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
).add(Pred
[1]);
2191 } else if (Pred
[0].getImm() == PPC::PRED_BIT_UNSET
) {
2192 MI
.setDesc(get(PPC::BCLRn
));
2193 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
).add(Pred
[1]);
2195 MI
.setDesc(get(PPC::BCCLR
));
2196 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2197 .addImm(Pred
[0].getImm())
2202 } else if (OpC
== PPC::B
) {
2203 if (Pred
[1].getReg() == PPC::CTR8
|| Pred
[1].getReg() == PPC::CTR
) {
2204 bool isPPC64
= Subtarget
.isPPC64();
2205 MI
.setDesc(get(Pred
[0].getImm() ? (isPPC64
? PPC::BDNZ8
: PPC::BDNZ
)
2206 : (isPPC64
? PPC::BDZ8
: PPC::BDZ
)));
2207 // Need add Def and Use for CTR implicit operand.
2208 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2209 .addReg(Pred
[1].getReg(), RegState::Implicit
)
2210 .addReg(Pred
[1].getReg(), RegState::ImplicitDefine
);
2211 } else if (Pred
[0].getImm() == PPC::PRED_BIT_SET
) {
2212 MachineBasicBlock
*MBB
= MI
.getOperand(0).getMBB();
2213 MI
.removeOperand(0);
2215 MI
.setDesc(get(PPC::BC
));
2216 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2219 } else if (Pred
[0].getImm() == PPC::PRED_BIT_UNSET
) {
2220 MachineBasicBlock
*MBB
= MI
.getOperand(0).getMBB();
2221 MI
.removeOperand(0);
2223 MI
.setDesc(get(PPC::BCn
));
2224 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2228 MachineBasicBlock
*MBB
= MI
.getOperand(0).getMBB();
2229 MI
.removeOperand(0);
2231 MI
.setDesc(get(PPC::BCC
));
2232 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2233 .addImm(Pred
[0].getImm())
2239 } else if (OpC
== PPC::BCTR
|| OpC
== PPC::BCTR8
|| OpC
== PPC::BCTRL
||
2240 OpC
== PPC::BCTRL8
|| OpC
== PPC::BCTRL_RM
||
2241 OpC
== PPC::BCTRL8_RM
) {
2242 if (Pred
[1].getReg() == PPC::CTR8
|| Pred
[1].getReg() == PPC::CTR
)
2243 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2245 bool setLR
= OpC
== PPC::BCTRL
|| OpC
== PPC::BCTRL8
||
2246 OpC
== PPC::BCTRL_RM
|| OpC
== PPC::BCTRL8_RM
;
2247 bool isPPC64
= Subtarget
.isPPC64();
2249 if (Pred
[0].getImm() == PPC::PRED_BIT_SET
) {
2250 MI
.setDesc(get(isPPC64
? (setLR
? PPC::BCCTRL8
: PPC::BCCTR8
)
2251 : (setLR
? PPC::BCCTRL
: PPC::BCCTR
)));
2252 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
).add(Pred
[1]);
2253 } else if (Pred
[0].getImm() == PPC::PRED_BIT_UNSET
) {
2254 MI
.setDesc(get(isPPC64
? (setLR
? PPC::BCCTRL8n
: PPC::BCCTR8n
)
2255 : (setLR
? PPC::BCCTRLn
: PPC::BCCTRn
)));
2256 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
).add(Pred
[1]);
2258 MI
.setDesc(get(isPPC64
? (setLR
? PPC::BCCCTRL8
: PPC::BCCCTR8
)
2259 : (setLR
? PPC::BCCCTRL
: PPC::BCCCTR
)));
2260 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2261 .addImm(Pred
[0].getImm())
2265 // Need add Def and Use for LR implicit operand.
2267 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2268 .addReg(isPPC64
? PPC::LR8
: PPC::LR
, RegState::Implicit
)
2269 .addReg(isPPC64
? PPC::LR8
: PPC::LR
, RegState::ImplicitDefine
);
2270 if (OpC
== PPC::BCTRL_RM
|| OpC
== PPC::BCTRL8_RM
)
2271 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
2272 .addReg(PPC::RM
, RegState::ImplicitDefine
);
2280 bool PPCInstrInfo::SubsumesPredicate(ArrayRef
<MachineOperand
> Pred1
,
2281 ArrayRef
<MachineOperand
> Pred2
) const {
2282 assert(Pred1
.size() == 2 && "Invalid PPC first predicate");
2283 assert(Pred2
.size() == 2 && "Invalid PPC second predicate");
2285 if (Pred1
[1].getReg() == PPC::CTR8
|| Pred1
[1].getReg() == PPC::CTR
)
2287 if (Pred2
[1].getReg() == PPC::CTR8
|| Pred2
[1].getReg() == PPC::CTR
)
2290 // P1 can only subsume P2 if they test the same condition register.
2291 if (Pred1
[1].getReg() != Pred2
[1].getReg())
2294 PPC::Predicate P1
= (PPC::Predicate
) Pred1
[0].getImm();
2295 PPC::Predicate P2
= (PPC::Predicate
) Pred2
[0].getImm();
2300 // Does P1 subsume P2, e.g. GE subsumes GT.
2301 if (P1
== PPC::PRED_LE
&&
2302 (P2
== PPC::PRED_LT
|| P2
== PPC::PRED_EQ
))
2304 if (P1
== PPC::PRED_GE
&&
2305 (P2
== PPC::PRED_GT
|| P2
== PPC::PRED_EQ
))
2311 bool PPCInstrInfo::ClobbersPredicate(MachineInstr
&MI
,
2312 std::vector
<MachineOperand
> &Pred
,
2313 bool SkipDead
) const {
2314 // Note: At the present time, the contents of Pred from this function is
2315 // unused by IfConversion. This implementation follows ARM by pushing the
2316 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2317 // predicate, instructions defining CTR or CTR8 are also included as
2318 // predicate-defining instructions.
2320 const TargetRegisterClass
*RCs
[] =
2321 { &PPC::CRRCRegClass
, &PPC::CRBITRCRegClass
,
2322 &PPC::CTRRCRegClass
, &PPC::CTRRC8RegClass
};
2325 for (const MachineOperand
&MO
: MI
.operands()) {
2326 for (unsigned c
= 0; c
< std::size(RCs
) && !Found
; ++c
) {
2327 const TargetRegisterClass
*RC
= RCs
[c
];
2329 if (MO
.isDef() && RC
->contains(MO
.getReg())) {
2333 } else if (MO
.isRegMask()) {
2334 for (MCPhysReg R
: *RC
)
2335 if (MO
.clobbersPhysReg(R
)) {
2346 bool PPCInstrInfo::analyzeCompare(const MachineInstr
&MI
, Register
&SrcReg
,
2347 Register
&SrcReg2
, int64_t &Mask
,
2348 int64_t &Value
) const {
2349 unsigned Opc
= MI
.getOpcode();
2352 default: return false;
2357 SrcReg
= MI
.getOperand(1).getReg();
2359 Value
= MI
.getOperand(2).getImm();
2368 SrcReg
= MI
.getOperand(1).getReg();
2369 SrcReg2
= MI
.getOperand(2).getReg();
2376 bool PPCInstrInfo::optimizeCompareInstr(MachineInstr
&CmpInstr
, Register SrcReg
,
2377 Register SrcReg2
, int64_t Mask
,
2379 const MachineRegisterInfo
*MRI
) const {
2383 int OpC
= CmpInstr
.getOpcode();
2384 Register CRReg
= CmpInstr
.getOperand(0).getReg();
2386 // FP record forms set CR1 based on the exception status bits, not a
2387 // comparison with zero.
2388 if (OpC
== PPC::FCMPUS
|| OpC
== PPC::FCMPUD
)
2391 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2392 // The record forms set the condition register based on a signed comparison
2393 // with zero (so says the ISA manual). This is not as straightforward as it
2394 // seems, however, because this is always a 64-bit comparison on PPC64, even
2395 // for instructions that are 32-bit in nature (like slw for example).
2396 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2397 // for equality checks (as those don't depend on the sign). On PPC64,
2398 // we are restricted to equality for unsigned 64-bit comparisons and for
2399 // signed 32-bit comparisons the applicability is more restricted.
2400 bool isPPC64
= Subtarget
.isPPC64();
2401 bool is32BitSignedCompare
= OpC
== PPC::CMPWI
|| OpC
== PPC::CMPW
;
2402 bool is32BitUnsignedCompare
= OpC
== PPC::CMPLWI
|| OpC
== PPC::CMPLW
;
2403 bool is64BitUnsignedCompare
= OpC
== PPC::CMPLDI
|| OpC
== PPC::CMPLD
;
2405 // Look through copies unless that gets us to a physical register.
2406 Register ActualSrc
= TRI
->lookThruCopyLike(SrcReg
, MRI
);
2407 if (ActualSrc
.isVirtual())
2410 // Get the unique definition of SrcReg.
2411 MachineInstr
*MI
= MRI
->getUniqueVRegDef(SrcReg
);
2412 if (!MI
) return false;
2414 bool equalityOnly
= false;
2417 if (is32BitSignedCompare
) {
2418 // We can perform this optimization only if SrcReg is sign-extending.
2419 if (isSignExtended(SrcReg
, MRI
))
2423 } else if (is32BitUnsignedCompare
) {
2424 // We can perform this optimization, equality only, if SrcReg is
2426 if (isZeroExtended(SrcReg
, MRI
)) {
2428 equalityOnly
= true;
2432 equalityOnly
= is64BitUnsignedCompare
;
2434 equalityOnly
= is32BitUnsignedCompare
;
2437 // We need to check the uses of the condition register in order to reject
2438 // non-equality comparisons.
2439 for (MachineRegisterInfo::use_instr_iterator
2440 I
= MRI
->use_instr_begin(CRReg
), IE
= MRI
->use_instr_end();
2442 MachineInstr
*UseMI
= &*I
;
2443 if (UseMI
->getOpcode() == PPC::BCC
) {
2444 PPC::Predicate Pred
= (PPC::Predicate
)UseMI
->getOperand(0).getImm();
2445 unsigned PredCond
= PPC::getPredicateCondition(Pred
);
2446 // We ignore hint bits when checking for non-equality comparisons.
2447 if (PredCond
!= PPC::PRED_EQ
&& PredCond
!= PPC::PRED_NE
)
2449 } else if (UseMI
->getOpcode() == PPC::ISEL
||
2450 UseMI
->getOpcode() == PPC::ISEL8
) {
2451 unsigned SubIdx
= UseMI
->getOperand(3).getSubReg();
2452 if (SubIdx
!= PPC::sub_eq
)
2459 MachineBasicBlock::iterator I
= CmpInstr
;
2461 // Scan forward to find the first use of the compare.
2462 for (MachineBasicBlock::iterator EL
= CmpInstr
.getParent()->end(); I
!= EL
;
2464 bool FoundUse
= false;
2465 for (MachineRegisterInfo::use_instr_iterator
2466 J
= MRI
->use_instr_begin(CRReg
), JE
= MRI
->use_instr_end();
2477 SmallVector
<std::pair
<MachineOperand
*, PPC::Predicate
>, 4> PredsToUpdate
;
2478 SmallVector
<std::pair
<MachineOperand
*, unsigned>, 4> SubRegsToUpdate
;
2480 // There are two possible candidates which can be changed to set CR[01].
2481 // One is MI, the other is a SUB instruction.
2482 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2483 MachineInstr
*Sub
= nullptr;
2485 // MI is not a candidate for CMPrr.
2487 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2488 // same BB as the comparison. This is to allow the check below to avoid calls
2489 // (and other explicit clobbers); instead we should really check for these
2490 // more explicitly (in at least a few predecessors).
2491 else if (MI
->getParent() != CmpInstr
.getParent())
2493 else if (Value
!= 0) {
2494 // The record-form instructions set CR bit based on signed comparison
2495 // against 0. We try to convert a compare against 1 or -1 into a compare
2496 // against 0 to exploit record-form instructions. For example, we change
2497 // the condition "greater than -1" into "greater than or equal to 0"
2498 // and "less than 1" into "less than or equal to 0".
2500 // Since we optimize comparison based on a specific branch condition,
2501 // we don't optimize if condition code is used by more than once.
2502 if (equalityOnly
|| !MRI
->hasOneUse(CRReg
))
2505 MachineInstr
*UseMI
= &*MRI
->use_instr_begin(CRReg
);
2506 if (UseMI
->getOpcode() != PPC::BCC
)
2509 PPC::Predicate Pred
= (PPC::Predicate
)UseMI
->getOperand(0).getImm();
2510 unsigned PredCond
= PPC::getPredicateCondition(Pred
);
2511 unsigned PredHint
= PPC::getPredicateHint(Pred
);
2512 int16_t Immed
= (int16_t)Value
;
2514 // When modifying the condition in the predicate, we propagate hint bits
2515 // from the original predicate to the new one.
2516 if (Immed
== -1 && PredCond
== PPC::PRED_GT
)
2517 // We convert "greater than -1" into "greater than or equal to 0",
2518 // since we are assuming signed comparison by !equalityOnly
2519 Pred
= PPC::getPredicate(PPC::PRED_GE
, PredHint
);
2520 else if (Immed
== -1 && PredCond
== PPC::PRED_LE
)
2521 // We convert "less than or equal to -1" into "less than 0".
2522 Pred
= PPC::getPredicate(PPC::PRED_LT
, PredHint
);
2523 else if (Immed
== 1 && PredCond
== PPC::PRED_LT
)
2524 // We convert "less than 1" into "less than or equal to 0".
2525 Pred
= PPC::getPredicate(PPC::PRED_LE
, PredHint
);
2526 else if (Immed
== 1 && PredCond
== PPC::PRED_GE
)
2527 // We convert "greater than or equal to 1" into "greater than 0".
2528 Pred
= PPC::getPredicate(PPC::PRED_GT
, PredHint
);
2532 // Convert the comparison and its user to a compare against zero with the
2533 // appropriate predicate on the branch. Zero comparison might provide
2534 // optimization opportunities post-RA (see optimization in
2535 // PPCPreEmitPeephole.cpp).
2536 UseMI
->getOperand(0).setImm(Pred
);
2537 CmpInstr
.getOperand(2).setImm(0);
2543 // Get ready to iterate backward from CmpInstr.
2544 MachineBasicBlock::iterator E
= MI
, B
= CmpInstr
.getParent()->begin();
2546 for (; I
!= E
&& !noSub
; --I
) {
2547 const MachineInstr
&Instr
= *I
;
2548 unsigned IOpC
= Instr
.getOpcode();
2550 if (&*I
!= &CmpInstr
&& (Instr
.modifiesRegister(PPC::CR0
, TRI
) ||
2551 Instr
.readsRegister(PPC::CR0
, TRI
)))
2552 // This instruction modifies or uses the record condition register after
2553 // the one we want to change. While we could do this transformation, it
2554 // would likely not be profitable. This transformation removes one
2555 // instruction, and so even forcing RA to generate one move probably
2556 // makes it unprofitable.
2559 // Check whether CmpInstr can be made redundant by the current instruction.
2560 if ((OpC
== PPC::CMPW
|| OpC
== PPC::CMPLW
||
2561 OpC
== PPC::CMPD
|| OpC
== PPC::CMPLD
) &&
2562 (IOpC
== PPC::SUBF
|| IOpC
== PPC::SUBF8
) &&
2563 ((Instr
.getOperand(1).getReg() == SrcReg
&&
2564 Instr
.getOperand(2).getReg() == SrcReg2
) ||
2565 (Instr
.getOperand(1).getReg() == SrcReg2
&&
2566 Instr
.getOperand(2).getReg() == SrcReg
))) {
2572 // The 'and' is below the comparison instruction.
2576 // Return false if no candidates exist.
2580 // The single candidate is called MI.
2584 int MIOpC
= MI
->getOpcode();
2585 if (MIOpC
== PPC::ANDI_rec
|| MIOpC
== PPC::ANDI8_rec
||
2586 MIOpC
== PPC::ANDIS_rec
|| MIOpC
== PPC::ANDIS8_rec
)
2589 NewOpC
= PPC::getRecordFormOpcode(MIOpC
);
2590 if (NewOpC
== -1 && PPC::getNonRecordFormOpcode(MIOpC
) != -1)
2594 // FIXME: On the non-embedded POWER architectures, only some of the record
2595 // forms are fast, and we should use only the fast ones.
2597 // The defining instruction has a record form (or is already a record
2598 // form). It is possible, however, that we'll need to reverse the condition
2599 // code of the users.
2603 // This transformation should not be performed if `nsw` is missing and is not
2604 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2605 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2606 // CRReg can reflect if compared values are equal, this optz is still valid.
2607 if (!equalityOnly
&& (NewOpC
== PPC::SUBF_rec
|| NewOpC
== PPC::SUBF8_rec
) &&
2608 Sub
&& !Sub
->getFlag(MachineInstr::NoSWrap
))
2611 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2612 // needs to be updated to be based on SUB. Push the condition code
2613 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2614 // condition code of these operands will be modified.
2615 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2616 // comparison against 0, which may modify predicate.
2617 bool ShouldSwap
= false;
2618 if (Sub
&& Value
== 0) {
2619 ShouldSwap
= SrcReg2
!= 0 && Sub
->getOperand(1).getReg() == SrcReg2
&&
2620 Sub
->getOperand(2).getReg() == SrcReg
;
2622 // The operands to subf are the opposite of sub, so only in the fixed-point
2623 // case, invert the order.
2624 ShouldSwap
= !ShouldSwap
;
2628 for (MachineRegisterInfo::use_instr_iterator
2629 I
= MRI
->use_instr_begin(CRReg
), IE
= MRI
->use_instr_end();
2631 MachineInstr
*UseMI
= &*I
;
2632 if (UseMI
->getOpcode() == PPC::BCC
) {
2633 PPC::Predicate Pred
= (PPC::Predicate
) UseMI
->getOperand(0).getImm();
2634 unsigned PredCond
= PPC::getPredicateCondition(Pred
);
2635 assert((!equalityOnly
||
2636 PredCond
== PPC::PRED_EQ
|| PredCond
== PPC::PRED_NE
) &&
2637 "Invalid predicate for equality-only optimization");
2638 (void)PredCond
; // To suppress warning in release build.
2639 PredsToUpdate
.push_back(std::make_pair(&(UseMI
->getOperand(0)),
2640 PPC::getSwappedPredicate(Pred
)));
2641 } else if (UseMI
->getOpcode() == PPC::ISEL
||
2642 UseMI
->getOpcode() == PPC::ISEL8
) {
2643 unsigned NewSubReg
= UseMI
->getOperand(3).getSubReg();
2644 assert((!equalityOnly
|| NewSubReg
== PPC::sub_eq
) &&
2645 "Invalid CR bit for equality-only optimization");
2647 if (NewSubReg
== PPC::sub_lt
)
2648 NewSubReg
= PPC::sub_gt
;
2649 else if (NewSubReg
== PPC::sub_gt
)
2650 NewSubReg
= PPC::sub_lt
;
2652 SubRegsToUpdate
.push_back(std::make_pair(&(UseMI
->getOperand(3)),
2654 } else // We need to abort on a user we don't understand.
2657 assert(!(Value
!= 0 && ShouldSwap
) &&
2658 "Non-zero immediate support and ShouldSwap"
2659 "may conflict in updating predicate");
2661 // Create a new virtual register to hold the value of the CR set by the
2662 // record-form instruction. If the instruction was not previously in
2663 // record form, then set the kill flag on the CR.
2664 CmpInstr
.eraseFromParent();
2666 MachineBasicBlock::iterator MII
= MI
;
2667 BuildMI(*MI
->getParent(), std::next(MII
), MI
->getDebugLoc(),
2668 get(TargetOpcode::COPY
), CRReg
)
2669 .addReg(PPC::CR0
, MIOpC
!= NewOpC
? RegState::Kill
: 0);
2671 // Even if CR0 register were dead before, it is alive now since the
2672 // instruction we just built uses it.
2673 MI
->clearRegisterDeads(PPC::CR0
);
2675 if (MIOpC
!= NewOpC
) {
2676 // We need to be careful here: we're replacing one instruction with
2677 // another, and we need to make sure that we get all of the right
2678 // implicit uses and defs. On the other hand, the caller may be holding
2679 // an iterator to this instruction, and so we can't delete it (this is
2680 // specifically the case if this is the instruction directly after the
2683 // Rotates are expensive instructions. If we're emitting a record-form
2684 // rotate that can just be an andi/andis, we should just emit that.
2685 if (MIOpC
== PPC::RLWINM
|| MIOpC
== PPC::RLWINM8
) {
2686 Register GPRRes
= MI
->getOperand(0).getReg();
2687 int64_t SH
= MI
->getOperand(2).getImm();
2688 int64_t MB
= MI
->getOperand(3).getImm();
2689 int64_t ME
= MI
->getOperand(4).getImm();
2690 // We can only do this if both the start and end of the mask are in the
2692 bool MBInLoHWord
= MB
>= 16;
2693 bool MEInLoHWord
= ME
>= 16;
2694 uint64_t Mask
= ~0LLU;
2696 if (MB
<= ME
&& MBInLoHWord
== MEInLoHWord
&& SH
== 0) {
2697 Mask
= ((1LLU << (32 - MB
)) - 1) & ~((1LLU << (31 - ME
)) - 1);
2698 // The mask value needs to shift right 16 if we're emitting andis.
2699 Mask
>>= MBInLoHWord
? 0 : 16;
2700 NewOpC
= MIOpC
== PPC::RLWINM
2701 ? (MBInLoHWord
? PPC::ANDI_rec
: PPC::ANDIS_rec
)
2702 : (MBInLoHWord
? PPC::ANDI8_rec
: PPC::ANDIS8_rec
);
2703 } else if (MRI
->use_empty(GPRRes
) && (ME
== 31) &&
2704 (ME
- MB
+ 1 == SH
) && (MB
>= 16)) {
2705 // If we are rotating by the exact number of bits as are in the mask
2706 // and the mask is in the least significant bits of the register,
2707 // that's just an andis. (as long as the GPR result has no uses).
2708 Mask
= ((1LLU << 32) - 1) & ~((1LLU << (32 - SH
)) - 1);
2710 NewOpC
= MIOpC
== PPC::RLWINM
? PPC::ANDIS_rec
: PPC::ANDIS8_rec
;
2712 // If we've set the mask, we can transform.
2713 if (Mask
!= ~0LLU) {
2714 MI
->removeOperand(4);
2715 MI
->removeOperand(3);
2716 MI
->getOperand(2).setImm(Mask
);
2717 NumRcRotatesConvertedToRcAnd
++;
2719 } else if (MIOpC
== PPC::RLDICL
&& MI
->getOperand(2).getImm() == 0) {
2720 int64_t MB
= MI
->getOperand(3).getImm();
2722 uint64_t Mask
= (1LLU << (63 - MB
+ 1)) - 1;
2723 NewOpC
= PPC::ANDI8_rec
;
2724 MI
->removeOperand(3);
2725 MI
->getOperand(2).setImm(Mask
);
2726 NumRcRotatesConvertedToRcAnd
++;
2730 const MCInstrDesc
&NewDesc
= get(NewOpC
);
2731 MI
->setDesc(NewDesc
);
2733 for (MCPhysReg ImpDef
: NewDesc
.implicit_defs()) {
2734 if (!MI
->definesRegister(ImpDef
, /*TRI=*/nullptr)) {
2735 MI
->addOperand(*MI
->getParent()->getParent(),
2736 MachineOperand::CreateReg(ImpDef
, true, true));
2739 for (MCPhysReg ImpUse
: NewDesc
.implicit_uses()) {
2740 if (!MI
->readsRegister(ImpUse
, /*TRI=*/nullptr)) {
2741 MI
->addOperand(*MI
->getParent()->getParent(),
2742 MachineOperand::CreateReg(ImpUse
, false, true));
2746 assert(MI
->definesRegister(PPC::CR0
, /*TRI=*/nullptr) &&
2747 "Record-form instruction does not define cr0?");
2749 // Modify the condition code of operands in OperandsToUpdate.
2750 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2751 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2752 for (unsigned i
= 0, e
= PredsToUpdate
.size(); i
< e
; i
++)
2753 PredsToUpdate
[i
].first
->setImm(PredsToUpdate
[i
].second
);
2755 for (unsigned i
= 0, e
= SubRegsToUpdate
.size(); i
< e
; i
++)
2756 SubRegsToUpdate
[i
].first
->setSubReg(SubRegsToUpdate
[i
].second
);
2761 bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr
&CmpMI
) const {
2762 MachineRegisterInfo
*MRI
= &CmpMI
.getParent()->getParent()->getRegInfo();
2766 Register SrcReg
, SrcReg2
;
2767 int64_t CmpMask
, CmpValue
;
2768 if (!analyzeCompare(CmpMI
, SrcReg
, SrcReg2
, CmpMask
, CmpValue
))
2771 // Try to optimize the comparison against 0.
2772 if (CmpValue
|| !CmpMask
|| SrcReg2
)
2775 // The record forms set the condition register based on a signed comparison
2776 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2777 // equality checks in post-RA, we are more restricted on a unsigned
2779 unsigned Opc
= CmpMI
.getOpcode();
2780 if (Opc
== PPC::CMPLWI
|| Opc
== PPC::CMPLDI
)
2783 // The record forms are always based on a 64-bit comparison on PPC64
2784 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2785 // comparison. Since we can't do the equality checks in post-RA, we bail out
2787 if (Subtarget
.isPPC64() && Opc
== PPC::CMPWI
)
2790 // CmpMI can't be deleted if it has implicit def.
2791 if (CmpMI
.hasImplicitDef())
2794 bool SrcRegHasOtherUse
= false;
2795 MachineInstr
*SrcMI
= getDefMIPostRA(SrcReg
, CmpMI
, SrcRegHasOtherUse
);
2796 if (!SrcMI
|| !SrcMI
->definesRegister(SrcReg
, /*TRI=*/nullptr))
2799 MachineOperand RegMO
= CmpMI
.getOperand(0);
2800 Register CRReg
= RegMO
.getReg();
2801 if (CRReg
!= PPC::CR0
)
2804 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2805 bool SeenUseOfCRReg
= false;
2806 bool IsCRRegKilled
= false;
2807 if (!isRegElgibleForForwarding(RegMO
, *SrcMI
, CmpMI
, false, IsCRRegKilled
,
2809 SrcMI
->definesRegister(CRReg
, /*TRI=*/nullptr) || SeenUseOfCRReg
)
2812 int SrcMIOpc
= SrcMI
->getOpcode();
2813 int NewOpC
= PPC::getRecordFormOpcode(SrcMIOpc
);
2817 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2818 LLVM_DEBUG(SrcMI
->dump());
2820 const MCInstrDesc
&NewDesc
= get(NewOpC
);
2821 SrcMI
->setDesc(NewDesc
);
2822 MachineInstrBuilder(*SrcMI
->getParent()->getParent(), SrcMI
)
2823 .addReg(CRReg
, RegState::ImplicitDefine
);
2824 SrcMI
->clearRegisterDeads(CRReg
);
2826 assert(SrcMI
->definesRegister(PPC::CR0
, /*TRI=*/nullptr) &&
2827 "Record-form instruction does not define cr0?");
2829 LLVM_DEBUG(dbgs() << "with: ");
2830 LLVM_DEBUG(SrcMI
->dump());
2831 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2832 LLVM_DEBUG(CmpMI
.dump());
2836 bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
2837 const MachineInstr
&LdSt
, SmallVectorImpl
<const MachineOperand
*> &BaseOps
,
2838 int64_t &Offset
, bool &OffsetIsScalable
, LocationSize
&Width
,
2839 const TargetRegisterInfo
*TRI
) const {
2840 const MachineOperand
*BaseOp
;
2841 OffsetIsScalable
= false;
2842 if (!getMemOperandWithOffsetWidth(LdSt
, BaseOp
, Offset
, Width
, TRI
))
2844 BaseOps
.push_back(BaseOp
);
2848 static bool isLdStSafeToCluster(const MachineInstr
&LdSt
,
2849 const TargetRegisterInfo
*TRI
) {
2850 // If this is a volatile load/store, don't mess with it.
2851 if (LdSt
.hasOrderedMemoryRef() || LdSt
.getNumExplicitOperands() != 3)
2854 if (LdSt
.getOperand(2).isFI())
2857 assert(LdSt
.getOperand(2).isReg() && "Expected a reg operand.");
2858 // Can't cluster if the instruction modifies the base register
2859 // or it is update form. e.g. ld r2,3(r2)
2860 if (LdSt
.modifiesRegister(LdSt
.getOperand(2).getReg(), TRI
))
2866 // Only cluster instruction pair that have the same opcode, and they are
2867 // clusterable according to PowerPC specification.
2868 static bool isClusterableLdStOpcPair(unsigned FirstOpc
, unsigned SecondOpc
,
2869 const PPCSubtarget
&Subtarget
) {
2876 case PPC::DFSTOREf64
:
2877 return FirstOpc
== SecondOpc
;
2878 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2879 // 32bit and 64bit instruction selection. They are clusterable pair though
2880 // they are different opcode.
2883 return SecondOpc
== PPC::STW
|| SecondOpc
== PPC::STW8
;
2887 bool PPCInstrInfo::shouldClusterMemOps(
2888 ArrayRef
<const MachineOperand
*> BaseOps1
, int64_t OpOffset1
,
2889 bool OffsetIsScalable1
, ArrayRef
<const MachineOperand
*> BaseOps2
,
2890 int64_t OpOffset2
, bool OffsetIsScalable2
, unsigned ClusterSize
,
2891 unsigned NumBytes
) const {
2893 assert(BaseOps1
.size() == 1 && BaseOps2
.size() == 1);
2894 const MachineOperand
&BaseOp1
= *BaseOps1
.front();
2895 const MachineOperand
&BaseOp2
= *BaseOps2
.front();
2896 assert((BaseOp1
.isReg() || BaseOp1
.isFI()) &&
2897 "Only base registers and frame indices are supported.");
2899 // ClusterSize means the number of memory operations that will have been
2900 // clustered if this hook returns true.
2901 // Don't cluster memory op if there are already two ops clustered at least.
2902 if (ClusterSize
> 2)
2905 // Cluster the load/store only when they have the same base
2907 if ((BaseOp1
.isReg() != BaseOp2
.isReg()) ||
2908 (BaseOp1
.isReg() && BaseOp1
.getReg() != BaseOp2
.getReg()) ||
2909 (BaseOp1
.isFI() && BaseOp1
.getIndex() != BaseOp2
.getIndex()))
2912 // Check if the load/store are clusterable according to the PowerPC
2914 const MachineInstr
&FirstLdSt
= *BaseOp1
.getParent();
2915 const MachineInstr
&SecondLdSt
= *BaseOp2
.getParent();
2916 unsigned FirstOpc
= FirstLdSt
.getOpcode();
2917 unsigned SecondOpc
= SecondLdSt
.getOpcode();
2918 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
2919 // Cluster the load/store only when they have the same opcode, and they are
2920 // clusterable opcode according to PowerPC specification.
2921 if (!isClusterableLdStOpcPair(FirstOpc
, SecondOpc
, Subtarget
))
2924 // Can't cluster load/store that have ordered or volatile memory reference.
2925 if (!isLdStSafeToCluster(FirstLdSt
, TRI
) ||
2926 !isLdStSafeToCluster(SecondLdSt
, TRI
))
2929 int64_t Offset1
= 0, Offset2
= 0;
2930 LocationSize Width1
= 0, Width2
= 0;
2931 const MachineOperand
*Base1
= nullptr, *Base2
= nullptr;
2932 if (!getMemOperandWithOffsetWidth(FirstLdSt
, Base1
, Offset1
, Width1
, TRI
) ||
2933 !getMemOperandWithOffsetWidth(SecondLdSt
, Base2
, Offset2
, Width2
, TRI
) ||
2937 assert(Base1
== &BaseOp1
&& Base2
== &BaseOp2
&&
2938 "getMemOperandWithOffsetWidth return incorrect base op");
2939 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2940 assert(Offset1
<= Offset2
&& "Caller should have ordered offsets.");
2941 return Offset1
+ (int64_t)Width1
.getValue() == Offset2
;
2944 /// GetInstSize - Return the number of bytes of code the specified
2945 /// instruction may be. This returns the maximum number of bytes.
2947 unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
2948 unsigned Opcode
= MI
.getOpcode();
2950 if (Opcode
== PPC::INLINEASM
|| Opcode
== PPC::INLINEASM_BR
) {
2951 const MachineFunction
*MF
= MI
.getParent()->getParent();
2952 const char *AsmStr
= MI
.getOperand(0).getSymbolName();
2953 return getInlineAsmLength(AsmStr
, *MF
->getTarget().getMCAsmInfo());
2954 } else if (Opcode
== TargetOpcode::STACKMAP
) {
2955 StackMapOpers
Opers(&MI
);
2956 return Opers
.getNumPatchBytes();
2957 } else if (Opcode
== TargetOpcode::PATCHPOINT
) {
2958 PatchPointOpers
Opers(&MI
);
2959 return Opers
.getNumPatchBytes();
2961 return get(Opcode
).getSize();
2965 std::pair
<unsigned, unsigned>
2966 PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
2967 // PPC always uses a direct mask.
2968 return std::make_pair(TF
, 0u);
2971 ArrayRef
<std::pair
<unsigned, const char *>>
2972 PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2973 using namespace PPCII
;
2974 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
2975 {MO_PLT
, "ppc-plt"},
2976 {MO_PIC_FLAG
, "ppc-pic"},
2977 {MO_PCREL_FLAG
, "ppc-pcrel"},
2978 {MO_GOT_FLAG
, "ppc-got"},
2979 {MO_PCREL_OPT_FLAG
, "ppc-opt-pcrel"},
2980 {MO_TLSGD_FLAG
, "ppc-tlsgd"},
2981 {MO_TPREL_FLAG
, "ppc-tprel"},
2982 {MO_TLSLDM_FLAG
, "ppc-tlsldm"},
2983 {MO_TLSLD_FLAG
, "ppc-tlsld"},
2984 {MO_TLSGDM_FLAG
, "ppc-tlsgdm"},
2985 {MO_GOT_TLSGD_PCREL_FLAG
, "ppc-got-tlsgd-pcrel"},
2986 {MO_GOT_TLSLD_PCREL_FLAG
, "ppc-got-tlsld-pcrel"},
2987 {MO_GOT_TPREL_PCREL_FLAG
, "ppc-got-tprel-pcrel"},
2990 {MO_TPREL_LO
, "ppc-tprel-lo"},
2991 {MO_TPREL_HA
, "ppc-tprel-ha"},
2992 {MO_DTPREL_LO
, "ppc-dtprel-lo"},
2993 {MO_TLSLD_LO
, "ppc-tlsld-lo"},
2994 {MO_TOC_LO
, "ppc-toc-lo"},
2995 {MO_TLS
, "ppc-tls"},
2996 {MO_PIC_HA_FLAG
, "ppc-ha-pic"},
2997 {MO_PIC_LO_FLAG
, "ppc-lo-pic"},
2998 {MO_TPREL_PCREL_FLAG
, "ppc-tprel-pcrel"},
2999 {MO_TLS_PCREL_FLAG
, "ppc-tls-pcrel"},
3000 {MO_GOT_PCREL_FLAG
, "ppc-got-pcrel"},
3002 return ArrayRef(TargetFlags
);
3005 // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3006 // The VSX versions have the advantage of a full 64-register target whereas
3007 // the FP ones have the advantage of lower latency and higher throughput. So
3008 // what we are after is using the faster instructions in low register pressure
3009 // situations and using the larger register file in high register pressure
3011 bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr
&MI
) const {
3012 unsigned UpperOpcode
, LowerOpcode
;
3013 switch (MI
.getOpcode()) {
3014 case PPC::DFLOADf32
:
3015 UpperOpcode
= PPC::LXSSP
;
3016 LowerOpcode
= PPC::LFS
;
3018 case PPC::DFLOADf64
:
3019 UpperOpcode
= PPC::LXSD
;
3020 LowerOpcode
= PPC::LFD
;
3022 case PPC::DFSTOREf32
:
3023 UpperOpcode
= PPC::STXSSP
;
3024 LowerOpcode
= PPC::STFS
;
3026 case PPC::DFSTOREf64
:
3027 UpperOpcode
= PPC::STXSD
;
3028 LowerOpcode
= PPC::STFD
;
3030 case PPC::XFLOADf32
:
3031 UpperOpcode
= PPC::LXSSPX
;
3032 LowerOpcode
= PPC::LFSX
;
3034 case PPC::XFLOADf64
:
3035 UpperOpcode
= PPC::LXSDX
;
3036 LowerOpcode
= PPC::LFDX
;
3038 case PPC::XFSTOREf32
:
3039 UpperOpcode
= PPC::STXSSPX
;
3040 LowerOpcode
= PPC::STFSX
;
3042 case PPC::XFSTOREf64
:
3043 UpperOpcode
= PPC::STXSDX
;
3044 LowerOpcode
= PPC::STFDX
;
3047 UpperOpcode
= PPC::LXSIWAX
;
3048 LowerOpcode
= PPC::LFIWAX
;
3051 UpperOpcode
= PPC::LXSIWZX
;
3052 LowerOpcode
= PPC::LFIWZX
;
3055 UpperOpcode
= PPC::STXSIWX
;
3056 LowerOpcode
= PPC::STFIWX
;
3059 llvm_unreachable("Unknown Operation!");
3062 Register TargetReg
= MI
.getOperand(0).getReg();
3064 if ((TargetReg
>= PPC::F0
&& TargetReg
<= PPC::F31
) ||
3065 (TargetReg
>= PPC::VSL0
&& TargetReg
<= PPC::VSL31
))
3066 Opcode
= LowerOpcode
;
3068 Opcode
= UpperOpcode
;
3069 MI
.setDesc(get(Opcode
));
3073 static bool isAnImmediateOperand(const MachineOperand
&MO
) {
3074 return MO
.isCPI() || MO
.isGlobal() || MO
.isImm();
3077 bool PPCInstrInfo::expandPostRAPseudo(MachineInstr
&MI
) const {
3078 auto &MBB
= *MI
.getParent();
3079 auto DL
= MI
.getDebugLoc();
3081 switch (MI
.getOpcode()) {
3082 case PPC::BUILD_UACC
: {
3083 MCRegister ACC
= MI
.getOperand(0).getReg();
3084 MCRegister UACC
= MI
.getOperand(1).getReg();
3085 if (ACC
- PPC::ACC0
!= UACC
- PPC::UACC0
) {
3086 MCRegister SrcVSR
= PPC::VSL0
+ (UACC
- PPC::UACC0
) * 4;
3087 MCRegister DstVSR
= PPC::VSL0
+ (ACC
- PPC::ACC0
) * 4;
3088 // FIXME: This can easily be improved to look up to the top of the MBB
3089 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3090 // we can just re-target any such XXLOR's to DstVSR + offset.
3091 for (int VecNo
= 0; VecNo
< 4; VecNo
++)
3092 BuildMI(MBB
, MI
, DL
, get(PPC::XXLOR
), DstVSR
+ VecNo
)
3093 .addReg(SrcVSR
+ VecNo
)
3094 .addReg(SrcVSR
+ VecNo
);
3096 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3097 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3101 case PPC::KILL_PAIR
: {
3102 MI
.setDesc(get(PPC::UNENCODED_NOP
));
3103 MI
.removeOperand(1);
3104 MI
.removeOperand(0);
3107 case TargetOpcode::LOAD_STACK_GUARD
: {
3108 auto M
= MBB
.getParent()->getFunction().getParent();
3110 (Subtarget
.isTargetLinux() || M
->getStackProtectorGuard() == "tls") &&
3111 "Only Linux target or tls mode are expected to contain "
3112 "LOAD_STACK_GUARD");
3114 if (M
->getStackProtectorGuard() == "tls")
3115 Offset
= M
->getStackProtectorGuardOffset();
3117 Offset
= Subtarget
.isPPC64() ? -0x7010 : -0x7008;
3118 const unsigned Reg
= Subtarget
.isPPC64() ? PPC::X13
: PPC::R2
;
3119 MI
.setDesc(get(Subtarget
.isPPC64() ? PPC::LD
: PPC::LWZ
));
3120 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
3125 case PPC::PPCLdFixedAddr
: {
3126 assert(Subtarget
.getTargetTriple().isOSGlibc() &&
3127 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3129 const unsigned Reg
= Subtarget
.isPPC64() ? PPC::X13
: PPC::R2
;
3130 MI
.setDesc(get(PPC::LWZ
));
3131 uint64_t FAType
= MI
.getOperand(1).getImm();
3132 #undef PPC_LNX_FEATURE
3134 #define PPC_LNX_DEFINE_OFFSETS
3135 #include "llvm/TargetParser/PPCTargetParser.def"
3136 bool IsLE
= Subtarget
.isLittleEndian();
3137 bool Is64
= Subtarget
.isPPC64();
3138 if (FAType
== PPC_FAWORD_HWCAP
) {
3140 Offset
= Is64
? PPC_HWCAP_OFFSET_LE64
: PPC_HWCAP_OFFSET_LE32
;
3142 Offset
= Is64
? PPC_HWCAP_OFFSET_BE64
: PPC_HWCAP_OFFSET_BE32
;
3143 } else if (FAType
== PPC_FAWORD_HWCAP2
) {
3145 Offset
= Is64
? PPC_HWCAP2_OFFSET_LE64
: PPC_HWCAP2_OFFSET_LE32
;
3147 Offset
= Is64
? PPC_HWCAP2_OFFSET_BE64
: PPC_HWCAP2_OFFSET_BE32
;
3148 } else if (FAType
== PPC_FAWORD_CPUID
) {
3150 Offset
= Is64
? PPC_CPUID_OFFSET_LE64
: PPC_CPUID_OFFSET_LE32
;
3152 Offset
= Is64
? PPC_CPUID_OFFSET_BE64
: PPC_CPUID_OFFSET_BE32
;
3154 assert(Offset
&& "Do not know the offset for this fixed addr load");
3155 MI
.removeOperand(1);
3156 Subtarget
.getTargetMachine().setGlibcHWCAPAccess();
3157 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
3161 #define PPC_TGT_PARSER_UNDEF_MACROS
3162 #include "llvm/TargetParser/PPCTargetParser.def"
3163 #undef PPC_TGT_PARSER_UNDEF_MACROS
3165 case PPC::DFLOADf32
:
3166 case PPC::DFLOADf64
:
3167 case PPC::DFSTOREf32
:
3168 case PPC::DFSTOREf64
: {
3169 assert(Subtarget
.hasP9Vector() &&
3170 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3171 assert(MI
.getOperand(2).isReg() &&
3172 isAnImmediateOperand(MI
.getOperand(1)) &&
3173 "D-form op must have register and immediate operands");
3174 return expandVSXMemPseudo(MI
);
3176 case PPC::XFLOADf32
:
3177 case PPC::XFSTOREf32
:
3181 assert(Subtarget
.hasP8Vector() &&
3182 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3183 assert(MI
.getOperand(2).isReg() && MI
.getOperand(1).isReg() &&
3184 "X-form op must have register and register operands");
3185 return expandVSXMemPseudo(MI
);
3187 case PPC::XFLOADf64
:
3188 case PPC::XFSTOREf64
: {
3189 assert(Subtarget
.hasVSX() &&
3190 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3191 assert(MI
.getOperand(2).isReg() && MI
.getOperand(1).isReg() &&
3192 "X-form op must have register and register operands");
3193 return expandVSXMemPseudo(MI
);
3195 case PPC::SPILLTOVSR_LD
: {
3196 Register TargetReg
= MI
.getOperand(0).getReg();
3197 if (PPC::VSFRCRegClass
.contains(TargetReg
)) {
3198 MI
.setDesc(get(PPC::DFLOADf64
));
3199 return expandPostRAPseudo(MI
);
3202 MI
.setDesc(get(PPC::LD
));
3205 case PPC::SPILLTOVSR_ST
: {
3206 Register SrcReg
= MI
.getOperand(0).getReg();
3207 if (PPC::VSFRCRegClass
.contains(SrcReg
)) {
3208 NumStoreSPILLVSRRCAsVec
++;
3209 MI
.setDesc(get(PPC::DFSTOREf64
));
3210 return expandPostRAPseudo(MI
);
3212 NumStoreSPILLVSRRCAsGpr
++;
3213 MI
.setDesc(get(PPC::STD
));
3217 case PPC::SPILLTOVSR_LDX
: {
3218 Register TargetReg
= MI
.getOperand(0).getReg();
3219 if (PPC::VSFRCRegClass
.contains(TargetReg
))
3220 MI
.setDesc(get(PPC::LXSDX
));
3222 MI
.setDesc(get(PPC::LDX
));
3225 case PPC::SPILLTOVSR_STX
: {
3226 Register SrcReg
= MI
.getOperand(0).getReg();
3227 if (PPC::VSFRCRegClass
.contains(SrcReg
)) {
3228 NumStoreSPILLVSRRCAsVec
++;
3229 MI
.setDesc(get(PPC::STXSDX
));
3231 NumStoreSPILLVSRRCAsGpr
++;
3232 MI
.setDesc(get(PPC::STDX
));
3237 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3239 case PPC::CFENCE8
: {
3240 auto Val
= MI
.getOperand(0).getReg();
3241 unsigned CmpOp
= Subtarget
.isPPC64() ? PPC::CMPD
: PPC::CMPW
;
3242 BuildMI(MBB
, MI
, DL
, get(CmpOp
), PPC::CR7
).addReg(Val
).addReg(Val
);
3243 BuildMI(MBB
, MI
, DL
, get(PPC::CTRL_DEP
))
3244 .addImm(PPC::PRED_NE_MINUS
)
3247 MI
.setDesc(get(PPC::ISYNC
));
3248 MI
.removeOperand(0);
3255 // Essentially a compile-time implementation of a compare->isel sequence.
3256 // It takes two constants to compare, along with the true/false registers
3257 // and the comparison type (as a subreg to a CR field) and returns one
3258 // of the true/false registers, depending on the comparison results.
3259 static unsigned selectReg(int64_t Imm1
, int64_t Imm2
, unsigned CompareOpc
,
3260 unsigned TrueReg
, unsigned FalseReg
,
3261 unsigned CRSubReg
) {
3262 // Signed comparisons. The immediates are assumed to be sign-extended.
3263 if (CompareOpc
== PPC::CMPWI
|| CompareOpc
== PPC::CMPDI
) {
3265 default: llvm_unreachable("Unknown integer comparison type.");
3267 return Imm1
< Imm2
? TrueReg
: FalseReg
;
3269 return Imm1
> Imm2
? TrueReg
: FalseReg
;
3271 return Imm1
== Imm2
? TrueReg
: FalseReg
;
3274 // Unsigned comparisons.
3275 else if (CompareOpc
== PPC::CMPLWI
|| CompareOpc
== PPC::CMPLDI
) {
3277 default: llvm_unreachable("Unknown integer comparison type.");
3279 return (uint64_t)Imm1
< (uint64_t)Imm2
? TrueReg
: FalseReg
;
3281 return (uint64_t)Imm1
> (uint64_t)Imm2
? TrueReg
: FalseReg
;
3283 return Imm1
== Imm2
? TrueReg
: FalseReg
;
3286 return PPC::NoRegister
;
3289 void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr
&MI
,
3291 int64_t Imm
) const {
3292 assert(MI
.getOperand(OpNo
).isReg() && "Operand must be a REG");
3293 // Replace the REG with the Immediate.
3294 Register InUseReg
= MI
.getOperand(OpNo
).getReg();
3295 MI
.getOperand(OpNo
).ChangeToImmediate(Imm
);
3297 // We need to make sure that the MI didn't have any implicit use
3298 // of this REG any more. We don't call MI.implicit_operands().empty() to
3299 // return early, since MI's MCID might be changed in calling context, as a
3300 // result its number of explicit operands may be changed, thus the begin of
3301 // implicit operand is changed.
3302 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3303 int UseOpIdx
= MI
.findRegisterUseOperandIdx(InUseReg
, TRI
, false);
3304 if (UseOpIdx
>= 0) {
3305 MachineOperand
&MO
= MI
.getOperand(UseOpIdx
);
3306 if (MO
.isImplicit())
3307 // The operands must always be in the following order:
3308 // - explicit reg defs,
3309 // - other explicit operands (reg uses, immediates, etc.),
3310 // - implicit reg defs
3311 // - implicit reg uses
3312 // Therefore, removing the implicit operand won't change the explicit
3314 MI
.removeOperand(UseOpIdx
);
3318 // Replace an instruction with one that materializes a constant (and sets
3319 // CR0 if the original instruction was a record-form instruction).
3320 void PPCInstrInfo::replaceInstrWithLI(MachineInstr
&MI
,
3321 const LoadImmediateInfo
&LII
) const {
3322 // Remove existing operands.
3323 int OperandToKeep
= LII
.SetCR
? 1 : 0;
3324 for (int i
= MI
.getNumOperands() - 1; i
> OperandToKeep
; i
--)
3325 MI
.removeOperand(i
);
3327 // Replace the instruction.
3329 MI
.setDesc(get(LII
.Is64Bit
? PPC::ANDI8_rec
: PPC::ANDI_rec
));
3330 // Set the immediate.
3331 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
3332 .addImm(LII
.Imm
).addReg(PPC::CR0
, RegState::ImplicitDefine
);
3336 MI
.setDesc(get(LII
.Is64Bit
? PPC::LI8
: PPC::LI
));
3338 // Set the immediate.
3339 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
)
3343 MachineInstr
*PPCInstrInfo::getDefMIPostRA(unsigned Reg
, MachineInstr
&MI
,
3344 bool &SeenIntermediateUse
) const {
3345 assert(!MI
.getParent()->getParent()->getRegInfo().isSSA() &&
3346 "Should be called after register allocation.");
3347 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3348 MachineBasicBlock::reverse_iterator E
= MI
.getParent()->rend(), It
= MI
;
3350 SeenIntermediateUse
= false;
3351 for (; It
!= E
; ++It
) {
3352 if (It
->modifiesRegister(Reg
, TRI
))
3354 if (It
->readsRegister(Reg
, TRI
))
3355 SeenIntermediateUse
= true;
3360 void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock
&MBB
,
3361 MachineBasicBlock::iterator MBBI
,
3362 const DebugLoc
&DL
, Register Reg
,
3363 int64_t Imm
) const {
3364 assert(!MBB
.getParent()->getRegInfo().isSSA() &&
3365 "Register should be in non-SSA form after RA");
3366 bool isPPC64
= Subtarget
.isPPC64();
3367 // FIXME: Materialization here is not optimal.
3368 // For some special bit patterns we can use less instructions.
3369 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3370 if (isInt
<16>(Imm
)) {
3371 BuildMI(MBB
, MBBI
, DL
, get(isPPC64
? PPC::LI8
: PPC::LI
), Reg
).addImm(Imm
);
3372 } else if (isInt
<32>(Imm
)) {
3373 BuildMI(MBB
, MBBI
, DL
, get(isPPC64
? PPC::LIS8
: PPC::LIS
), Reg
)
3376 BuildMI(MBB
, MBBI
, DL
, get(isPPC64
? PPC::ORI8
: PPC::ORI
), Reg
)
3377 .addReg(Reg
, RegState::Kill
)
3378 .addImm(Imm
& 0xFFFF);
3380 assert(isPPC64
&& "Materializing 64-bit immediate to single register is "
3381 "only supported in PPC64");
3382 BuildMI(MBB
, MBBI
, DL
, get(PPC::LIS8
), Reg
).addImm(Imm
>> 48);
3383 if ((Imm
>> 32) & 0xFFFF)
3384 BuildMI(MBB
, MBBI
, DL
, get(PPC::ORI8
), Reg
)
3385 .addReg(Reg
, RegState::Kill
)
3386 .addImm((Imm
>> 32) & 0xFFFF);
3387 BuildMI(MBB
, MBBI
, DL
, get(PPC::RLDICR
), Reg
)
3388 .addReg(Reg
, RegState::Kill
)
3391 BuildMI(MBB
, MBBI
, DL
, get(PPC::ORIS8
), Reg
)
3392 .addReg(Reg
, RegState::Kill
)
3393 .addImm((Imm
>> 16) & 0xFFFF);
3395 BuildMI(MBB
, MBBI
, DL
, get(PPC::ORI8
), Reg
)
3396 .addReg(Reg
, RegState::Kill
)
3397 .addImm(Imm
& 0xFFFF);
3401 MachineInstr
*PPCInstrInfo::getForwardingDefMI(
3403 unsigned &OpNoForForwarding
,
3404 bool &SeenIntermediateUse
) const {
3405 OpNoForForwarding
= ~0U;
3406 MachineInstr
*DefMI
= nullptr;
3407 MachineRegisterInfo
*MRI
= &MI
.getParent()->getParent()->getRegInfo();
3408 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
3409 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3410 // within the basic block to see if the register is defined using an
3411 // LI/LI8/ADDI/ADDI8.
3413 for (int i
= 1, e
= MI
.getNumOperands(); i
< e
; i
++) {
3414 if (!MI
.getOperand(i
).isReg())
3416 Register Reg
= MI
.getOperand(i
).getReg();
3417 if (!Reg
.isVirtual())
3419 Register TrueReg
= TRI
->lookThruCopyLike(Reg
, MRI
);
3420 if (TrueReg
.isVirtual()) {
3421 MachineInstr
*DefMIForTrueReg
= MRI
->getVRegDef(TrueReg
);
3422 if (DefMIForTrueReg
->getOpcode() == PPC::LI
||
3423 DefMIForTrueReg
->getOpcode() == PPC::LI8
||
3424 DefMIForTrueReg
->getOpcode() == PPC::ADDI
||
3425 DefMIForTrueReg
->getOpcode() == PPC::ADDI8
) {
3426 OpNoForForwarding
= i
;
3427 DefMI
= DefMIForTrueReg
;
3428 // The ADDI and LI operand maybe exist in one instruction at same
3429 // time. we prefer to fold LI operand as LI only has one Imm operand
3430 // and is more possible to be converted. So if current DefMI is
3431 // ADDI/ADDI8, we continue to find possible LI/LI8.
3432 if (DefMI
->getOpcode() == PPC::LI
|| DefMI
->getOpcode() == PPC::LI8
)
3438 // Looking back through the definition for each operand could be expensive,
3439 // so exit early if this isn't an instruction that either has an immediate
3440 // form or is already an immediate form that we can handle.
3442 unsigned Opc
= MI
.getOpcode();
3443 bool ConvertibleImmForm
=
3444 Opc
== PPC::CMPWI
|| Opc
== PPC::CMPLWI
|| Opc
== PPC::CMPDI
||
3445 Opc
== PPC::CMPLDI
|| Opc
== PPC::ADDI
|| Opc
== PPC::ADDI8
||
3446 Opc
== PPC::ORI
|| Opc
== PPC::ORI8
|| Opc
== PPC::XORI
||
3447 Opc
== PPC::XORI8
|| Opc
== PPC::RLDICL
|| Opc
== PPC::RLDICL_rec
||
3448 Opc
== PPC::RLDICL_32
|| Opc
== PPC::RLDICL_32_64
||
3449 Opc
== PPC::RLWINM
|| Opc
== PPC::RLWINM_rec
|| Opc
== PPC::RLWINM8
||
3450 Opc
== PPC::RLWINM8_rec
;
3451 bool IsVFReg
= (MI
.getNumOperands() && MI
.getOperand(0).isReg())
3452 ? PPC::isVFRegister(MI
.getOperand(0).getReg())
3454 if (!ConvertibleImmForm
&& !instrHasImmForm(Opc
, IsVFReg
, III
, true))
3457 // Don't convert or %X, %Y, %Y since that's just a register move.
3458 if ((Opc
== PPC::OR
|| Opc
== PPC::OR8
) &&
3459 MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg())
3461 for (int i
= 1, e
= MI
.getNumOperands(); i
< e
; i
++) {
3462 MachineOperand
&MO
= MI
.getOperand(i
);
3463 SeenIntermediateUse
= false;
3464 if (MO
.isReg() && MO
.isUse() && !MO
.isImplicit()) {
3465 Register Reg
= MI
.getOperand(i
).getReg();
3466 // If we see another use of this reg between the def and the MI,
3467 // we want to flag it so the def isn't deleted.
3468 MachineInstr
*DefMI
= getDefMIPostRA(Reg
, MI
, SeenIntermediateUse
);
3470 // Is this register defined by some form of add-immediate (including
3471 // load-immediate) within this basic block?
3472 switch (DefMI
->getOpcode()) {
3477 case PPC::ADDItocL8
:
3480 OpNoForForwarding
= i
;
3487 return OpNoForForwarding
== ~0U ? nullptr : DefMI
;
3490 unsigned PPCInstrInfo::getSpillTarget() const {
3491 // With P10, we may need to spill paired vector registers or accumulator
3492 // registers. MMA implies paired vectors, so we can just check that.
3493 bool IsP10Variant
= Subtarget
.isISA3_1() || Subtarget
.pairedVectorMemops();
3494 // P11 uses the P10 target.
3495 return Subtarget
.isISAFuture() ? 3 : IsP10Variant
?
3496 2 : Subtarget
.hasP9Vector() ?
3500 ArrayRef
<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3501 return {StoreSpillOpcodesArray
[getSpillTarget()], SOK_LastOpcodeSpill
};
3504 ArrayRef
<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3505 return {LoadSpillOpcodesArray
[getSpillTarget()], SOK_LastOpcodeSpill
};
3508 // This opt tries to convert the following imm form to an index form to save an
3509 // add for stack variables.
3510 // Return false if no such pattern found.
3512 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3513 // ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3514 // Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3516 // can be converted to:
3518 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3519 // Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3521 // In order to eliminate ADD instr, make sure that:
3522 // 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3523 // new ADDI instr and ADDI can only take int16 Imm.
3524 // 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3525 // between ADDI and ADD instr since its original def in ADDI will be changed
3526 // in new ADDI instr. And also there should be no new def for it between
3527 // ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3528 // 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3529 // between ADD and Imm instr since ADD instr will be eliminated.
3530 // 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3531 // moved to Index instr.
3532 bool PPCInstrInfo::foldFrameOffset(MachineInstr
&MI
) const {
3533 MachineFunction
*MF
= MI
.getParent()->getParent();
3534 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
3535 bool PostRA
= !MRI
->isSSA();
3536 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3537 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3538 // frame base(OffsetAddi) are determined.
3541 unsigned ToBeDeletedReg
= 0;
3542 int64_t OffsetImm
= 0;
3543 unsigned XFormOpcode
= 0;
3546 // Check if Imm instr meets requirement.
3547 if (!isImmInstrEligibleForFolding(MI
, ToBeDeletedReg
, XFormOpcode
, OffsetImm
,
3551 bool OtherIntermediateUse
= false;
3552 MachineInstr
*ADDMI
= getDefMIPostRA(ToBeDeletedReg
, MI
, OtherIntermediateUse
);
3554 // Exit if there is other use between ADD and Imm instr or no def found.
3555 if (OtherIntermediateUse
|| !ADDMI
)
3558 // Check if ADD instr meets requirement.
3559 if (!isADDInstrEligibleForFolding(*ADDMI
))
3562 unsigned ScaleRegIdx
= 0;
3563 int64_t OffsetAddi
= 0;
3564 MachineInstr
*ADDIMI
= nullptr;
3566 // Check if there is a valid ToBeChangedReg in ADDMI.
3567 // 1: It must be killed.
3568 // 2: Its definition must be a valid ADDIMI.
3569 // 3: It must satify int16 offset requirement.
3570 if (isValidToBeChangedReg(ADDMI
, 1, ADDIMI
, OffsetAddi
, OffsetImm
))
3572 else if (isValidToBeChangedReg(ADDMI
, 2, ADDIMI
, OffsetAddi
, OffsetImm
))
3577 assert(ADDIMI
&& "There should be ADDIMI for valid ToBeChangedReg.");
3578 Register ToBeChangedReg
= ADDIMI
->getOperand(0).getReg();
3579 Register ScaleReg
= ADDMI
->getOperand(ScaleRegIdx
).getReg();
3580 auto NewDefFor
= [&](unsigned Reg
, MachineBasicBlock::iterator Start
,
3581 MachineBasicBlock::iterator End
) {
3582 for (auto It
= ++Start
; It
!= End
; It
++)
3583 if (It
->modifiesRegister(Reg
, &getRegisterInfo()))
3588 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3589 // treated as special zero when ScaleReg is R0/X0 register.
3590 if (III
.ZeroIsSpecialOrig
== III
.ImmOpNo
&&
3591 (ScaleReg
== PPC::R0
|| ScaleReg
== PPC::X0
))
3594 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3596 if (NewDefFor(ToBeChangedReg
, *ADDMI
, MI
) || NewDefFor(ScaleReg
, *ADDMI
, MI
))
3599 // Now start to do the transformation.
3600 LLVM_DEBUG(dbgs() << "Replace instruction: "
3602 LLVM_DEBUG(ADDIMI
->dump());
3603 LLVM_DEBUG(ADDMI
->dump());
3604 LLVM_DEBUG(MI
.dump());
3605 LLVM_DEBUG(dbgs() << "with: "
3608 // Update ADDI instr.
3609 ADDIMI
->getOperand(2).setImm(OffsetAddi
+ OffsetImm
);
3611 // Update Imm instr.
3612 MI
.setDesc(get(XFormOpcode
));
3613 MI
.getOperand(III
.ImmOpNo
)
3614 .ChangeToRegister(ScaleReg
, false, false,
3615 ADDMI
->getOperand(ScaleRegIdx
).isKill());
3617 MI
.getOperand(III
.OpNoForForwarding
)
3618 .ChangeToRegister(ToBeChangedReg
, false, false, true);
3620 // Eliminate ADD instr.
3621 ADDMI
->eraseFromParent();
3623 LLVM_DEBUG(ADDIMI
->dump());
3624 LLVM_DEBUG(MI
.dump());
3629 bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr
&ADDIMI
,
3630 int64_t &Imm
) const {
3631 unsigned Opc
= ADDIMI
.getOpcode();
3633 // Exit if the instruction is not ADDI.
3634 if (Opc
!= PPC::ADDI
&& Opc
!= PPC::ADDI8
)
3637 // The operand may not necessarily be an immediate - it could be a relocation.
3638 if (!ADDIMI
.getOperand(2).isImm())
3641 Imm
= ADDIMI
.getOperand(2).getImm();
3646 bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr
&ADDMI
) const {
3647 unsigned Opc
= ADDMI
.getOpcode();
3649 // Exit if the instruction is not ADD.
3650 return Opc
== PPC::ADD4
|| Opc
== PPC::ADD8
;
3653 bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr
&MI
,
3654 unsigned &ToBeDeletedReg
,
3655 unsigned &XFormOpcode
,
3657 ImmInstrInfo
&III
) const {
3658 // Only handle load/store.
3659 if (!MI
.mayLoadOrStore())
3662 unsigned Opc
= MI
.getOpcode();
3664 XFormOpcode
= RI
.getMappedIdxOpcForImmOpc(Opc
);
3666 // Exit if instruction has no index form.
3667 if (XFormOpcode
== PPC::INSTRUCTION_LIST_END
)
3670 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3671 if (!instrHasImmForm(XFormOpcode
,
3672 PPC::isVFRegister(MI
.getOperand(0).getReg()), III
, true))
3675 if (!III
.IsSummingOperands
)
3678 MachineOperand ImmOperand
= MI
.getOperand(III
.ImmOpNo
);
3679 MachineOperand RegOperand
= MI
.getOperand(III
.OpNoForForwarding
);
3680 // Only support imm operands, not relocation slots or others.
3681 if (!ImmOperand
.isImm())
3684 assert(RegOperand
.isReg() && "Instruction format is not right");
3686 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3687 if (!RegOperand
.isKill())
3690 ToBeDeletedReg
= RegOperand
.getReg();
3691 OffsetImm
= ImmOperand
.getImm();
3696 bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr
*ADDMI
, unsigned Index
,
3697 MachineInstr
*&ADDIMI
,
3698 int64_t &OffsetAddi
,
3699 int64_t OffsetImm
) const {
3700 assert((Index
== 1 || Index
== 2) && "Invalid operand index for add.");
3701 MachineOperand
&MO
= ADDMI
->getOperand(Index
);
3706 bool OtherIntermediateUse
= false;
3708 ADDIMI
= getDefMIPostRA(MO
.getReg(), *ADDMI
, OtherIntermediateUse
);
3709 // Currently handle only one "add + Imminstr" pair case, exit if other
3710 // intermediate use for ToBeChangedReg found.
3711 // TODO: handle the cases where there are other "add + Imminstr" pairs
3712 // with same offset in Imminstr which is like:
3714 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3715 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3716 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3717 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3718 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3720 // can be converted to:
3722 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3723 // (OffsetAddi + OffsetImm)
3724 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3725 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3727 if (OtherIntermediateUse
|| !ADDIMI
)
3729 // Check if ADDI instr meets requirement.
3730 if (!isADDIInstrEligibleForFolding(*ADDIMI
, OffsetAddi
))
3733 if (isInt
<16>(OffsetAddi
+ OffsetImm
))
3738 // If this instruction has an immediate form and one of its operands is a
3739 // result of a load-immediate or an add-immediate, convert it to
3740 // the immediate form if the constant is in range.
3741 bool PPCInstrInfo::convertToImmediateForm(MachineInstr
&MI
,
3742 SmallSet
<Register
, 4> &RegsToUpdate
,
3743 MachineInstr
**KilledDef
) const {
3744 MachineFunction
*MF
= MI
.getParent()->getParent();
3745 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
3746 bool PostRA
= !MRI
->isSSA();
3747 bool SeenIntermediateUse
= true;
3748 unsigned ForwardingOperand
= ~0U;
3749 MachineInstr
*DefMI
= getForwardingDefMI(MI
, ForwardingOperand
,
3750 SeenIntermediateUse
);
3753 assert(ForwardingOperand
< MI
.getNumOperands() &&
3754 "The forwarding operand needs to be valid at this point");
3755 bool IsForwardingOperandKilled
= MI
.getOperand(ForwardingOperand
).isKill();
3756 bool KillFwdDefMI
= !SeenIntermediateUse
&& IsForwardingOperandKilled
;
3757 if (KilledDef
&& KillFwdDefMI
)
3760 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3761 // registers that need their kill flags updated.
3762 for (const MachineOperand
&MO
: DefMI
->operands())
3763 if (MO
.isReg() && MO
.isDef())
3764 RegsToUpdate
.insert(MO
.getReg());
3765 for (const MachineOperand
&MO
: MI
.operands())
3767 RegsToUpdate
.insert(MO
.getReg());
3769 // If this is a imm instruction and its register operands is produced by ADDI,
3770 // put the imm into imm inst directly.
3771 if (RI
.getMappedIdxOpcForImmOpc(MI
.getOpcode()) !=
3772 PPC::INSTRUCTION_LIST_END
&&
3773 transformToNewImmFormFedByAdd(MI
, *DefMI
, ForwardingOperand
))
3777 bool IsVFReg
= MI
.getOperand(0).isReg()
3778 ? PPC::isVFRegister(MI
.getOperand(0).getReg())
3780 bool HasImmForm
= instrHasImmForm(MI
.getOpcode(), IsVFReg
, III
, PostRA
);
3781 // If this is a reg+reg instruction that has a reg+imm form,
3782 // and one of the operands is produced by an add-immediate,
3783 // try to convert it.
3785 transformToImmFormFedByAdd(MI
, III
, ForwardingOperand
, *DefMI
,
3789 // If this is a reg+reg instruction that has a reg+imm form,
3790 // and one of the operands is produced by LI, convert it now.
3792 transformToImmFormFedByLI(MI
, III
, ForwardingOperand
, *DefMI
))
3795 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3796 // can be simpified to LI.
3797 if (!HasImmForm
&& simplifyToLI(MI
, *DefMI
, ForwardingOperand
, KilledDef
))
3803 bool PPCInstrInfo::combineRLWINM(MachineInstr
&MI
,
3804 MachineInstr
**ToErase
) const {
3805 MachineRegisterInfo
*MRI
= &MI
.getParent()->getParent()->getRegInfo();
3806 Register FoldingReg
= MI
.getOperand(1).getReg();
3807 if (!FoldingReg
.isVirtual())
3809 MachineInstr
*SrcMI
= MRI
->getVRegDef(FoldingReg
);
3810 if (SrcMI
->getOpcode() != PPC::RLWINM
&&
3811 SrcMI
->getOpcode() != PPC::RLWINM_rec
&&
3812 SrcMI
->getOpcode() != PPC::RLWINM8
&&
3813 SrcMI
->getOpcode() != PPC::RLWINM8_rec
)
3815 assert((MI
.getOperand(2).isImm() && MI
.getOperand(3).isImm() &&
3816 MI
.getOperand(4).isImm() && SrcMI
->getOperand(2).isImm() &&
3817 SrcMI
->getOperand(3).isImm() && SrcMI
->getOperand(4).isImm()) &&
3818 "Invalid PPC::RLWINM Instruction!");
3819 uint64_t SHSrc
= SrcMI
->getOperand(2).getImm();
3820 uint64_t SHMI
= MI
.getOperand(2).getImm();
3821 uint64_t MBSrc
= SrcMI
->getOperand(3).getImm();
3822 uint64_t MBMI
= MI
.getOperand(3).getImm();
3823 uint64_t MESrc
= SrcMI
->getOperand(4).getImm();
3824 uint64_t MEMI
= MI
.getOperand(4).getImm();
3826 assert((MEMI
< 32 && MESrc
< 32 && MBMI
< 32 && MBSrc
< 32) &&
3827 "Invalid PPC::RLWINM Instruction!");
3828 // If MBMI is bigger than MEMI, we always can not get run of ones.
3829 // RotatedSrcMask non-wrap:
3830 // 0........31|32........63
3831 // RotatedSrcMask: B---E B---E
3832 // MaskMI: -----------|--E B------
3833 // Result: ----- --- (Bad candidate)
3835 // RotatedSrcMask wrap:
3836 // 0........31|32........63
3837 // RotatedSrcMask: --E B----|--E B----
3838 // MaskMI: -----------|--E B------
3839 // Result: --- -----|--- ----- (Bad candidate)
3841 // One special case is RotatedSrcMask is a full set mask.
3842 // RotatedSrcMask full:
3843 // 0........31|32........63
3844 // RotatedSrcMask: ------EB---|-------EB---
3845 // MaskMI: -----------|--E B------
3846 // Result: -----------|--- ------- (Good candidate)
3848 // Mark special case.
3849 bool SrcMaskFull
= (MBSrc
- MESrc
== 1) || (MBSrc
== 0 && MESrc
== 31);
3851 // For other MBMI > MEMI cases, just return.
3852 if ((MBMI
> MEMI
) && !SrcMaskFull
)
3855 // Handle MBMI <= MEMI cases.
3856 APInt MaskMI
= APInt::getBitsSetWithWrap(32, 32 - MEMI
- 1, 32 - MBMI
);
3857 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3858 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3859 // while in PowerPC ISA, lowerest bit is at index 63.
3860 APInt MaskSrc
= APInt::getBitsSetWithWrap(32, 32 - MESrc
- 1, 32 - MBSrc
);
3862 APInt RotatedSrcMask
= MaskSrc
.rotl(SHMI
);
3863 APInt FinalMask
= RotatedSrcMask
& MaskMI
;
3864 uint32_t NewMB
, NewME
;
3865 bool Simplified
= false;
3867 // If final mask is 0, MI result should be 0 too.
3868 if (FinalMask
.isZero()) {
3870 (MI
.getOpcode() == PPC::RLWINM8
|| MI
.getOpcode() == PPC::RLWINM8_rec
);
3872 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3873 LLVM_DEBUG(MI
.dump());
3875 if (MI
.getOpcode() == PPC::RLWINM
|| MI
.getOpcode() == PPC::RLWINM8
) {
3876 // Replace MI with "LI 0"
3877 MI
.removeOperand(4);
3878 MI
.removeOperand(3);
3879 MI
.removeOperand(2);
3880 MI
.getOperand(1).ChangeToImmediate(0);
3881 MI
.setDesc(get(Is64Bit
? PPC::LI8
: PPC::LI
));
3883 // Replace MI with "ANDI_rec reg, 0"
3884 MI
.removeOperand(4);
3885 MI
.removeOperand(3);
3886 MI
.getOperand(2).setImm(0);
3887 MI
.setDesc(get(Is64Bit
? PPC::ANDI8_rec
: PPC::ANDI_rec
));
3888 MI
.getOperand(1).setReg(SrcMI
->getOperand(1).getReg());
3889 if (SrcMI
->getOperand(1).isKill()) {
3890 MI
.getOperand(1).setIsKill(true);
3891 SrcMI
->getOperand(1).setIsKill(false);
3893 // About to replace MI.getOperand(1), clear its kill flag.
3894 MI
.getOperand(1).setIsKill(false);
3897 LLVM_DEBUG(dbgs() << "With: ");
3898 LLVM_DEBUG(MI
.dump());
3900 } else if ((isRunOfOnes((unsigned)(FinalMask
.getZExtValue()), NewMB
, NewME
) &&
3903 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3904 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3905 // return a 32 bit value.
3907 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3908 LLVM_DEBUG(MI
.dump());
3910 uint16_t NewSH
= (SHSrc
+ SHMI
) % 32;
3911 MI
.getOperand(2).setImm(NewSH
);
3912 // If SrcMI mask is full, no need to update MBMI and MEMI.
3914 MI
.getOperand(3).setImm(NewMB
);
3915 MI
.getOperand(4).setImm(NewME
);
3917 MI
.getOperand(1).setReg(SrcMI
->getOperand(1).getReg());
3918 if (SrcMI
->getOperand(1).isKill()) {
3919 MI
.getOperand(1).setIsKill(true);
3920 SrcMI
->getOperand(1).setIsKill(false);
3922 // About to replace MI.getOperand(1), clear its kill flag.
3923 MI
.getOperand(1).setIsKill(false);
3925 LLVM_DEBUG(dbgs() << "To: ");
3926 LLVM_DEBUG(MI
.dump());
3928 if (Simplified
& MRI
->use_nodbg_empty(FoldingReg
) &&
3929 !SrcMI
->hasImplicitDef()) {
3930 // If FoldingReg has no non-debug use and it has no implicit def (it
3931 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3932 // Otherwise keep it.
3934 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3935 LLVM_DEBUG(SrcMI
->dump());
3940 bool PPCInstrInfo::instrHasImmForm(unsigned Opc
, bool IsVFReg
,
3941 ImmInstrInfo
&III
, bool PostRA
) const {
3942 // The vast majority of the instructions would need their operand 2 replaced
3943 // with an immediate when switching to the reg+imm form. A marked exception
3944 // are the update form loads/stores for which a constant operand 2 would need
3945 // to turn into a displacement and move operand 1 to the operand 2 position.
3947 III
.OpNoForForwarding
= 2;
3949 III
.ImmMustBeMultipleOf
= 1;
3950 III
.TruncateImmTo
= 0;
3951 III
.IsSummingOperands
= false;
3953 default: return false;
3956 III
.SignedImm
= true;
3957 III
.ZeroIsSpecialOrig
= 0;
3958 III
.ZeroIsSpecialNew
= 1;
3959 III
.IsCommutative
= true;
3960 III
.IsSummingOperands
= true;
3961 III
.ImmOpcode
= Opc
== PPC::ADD4
? PPC::ADDI
: PPC::ADDI8
;
3965 III
.SignedImm
= true;
3966 III
.ZeroIsSpecialOrig
= 0;
3967 III
.ZeroIsSpecialNew
= 0;
3968 III
.IsCommutative
= true;
3969 III
.IsSummingOperands
= true;
3970 III
.ImmOpcode
= Opc
== PPC::ADDC
? PPC::ADDIC
: PPC::ADDIC8
;
3973 III
.SignedImm
= true;
3974 III
.ZeroIsSpecialOrig
= 0;
3975 III
.ZeroIsSpecialNew
= 0;
3976 III
.IsCommutative
= true;
3977 III
.IsSummingOperands
= true;
3978 III
.ImmOpcode
= PPC::ADDIC_rec
;
3982 III
.SignedImm
= true;
3983 III
.ZeroIsSpecialOrig
= 0;
3984 III
.ZeroIsSpecialNew
= 0;
3985 III
.IsCommutative
= false;
3986 III
.ImmOpcode
= Opc
== PPC::SUBFC
? PPC::SUBFIC
: PPC::SUBFIC8
;
3990 III
.SignedImm
= true;
3991 III
.ZeroIsSpecialOrig
= 0;
3992 III
.ZeroIsSpecialNew
= 0;
3993 III
.IsCommutative
= false;
3994 III
.ImmOpcode
= Opc
== PPC::CMPW
? PPC::CMPWI
: PPC::CMPDI
;
3998 III
.SignedImm
= false;
3999 III
.ZeroIsSpecialOrig
= 0;
4000 III
.ZeroIsSpecialNew
= 0;
4001 III
.IsCommutative
= false;
4002 III
.ImmOpcode
= Opc
== PPC::CMPLW
? PPC::CMPLWI
: PPC::CMPLDI
;
4010 III
.SignedImm
= false;
4011 III
.ZeroIsSpecialOrig
= 0;
4012 III
.ZeroIsSpecialNew
= 0;
4013 III
.IsCommutative
= true;
4015 default: llvm_unreachable("Unknown opcode");
4017 III
.ImmOpcode
= PPC::ANDI_rec
;
4020 III
.ImmOpcode
= PPC::ANDI8_rec
;
4022 case PPC::OR
: III
.ImmOpcode
= PPC::ORI
; break;
4023 case PPC::OR8
: III
.ImmOpcode
= PPC::ORI8
; break;
4024 case PPC::XOR
: III
.ImmOpcode
= PPC::XORI
; break;
4025 case PPC::XOR8
: III
.ImmOpcode
= PPC::XORI8
; break;
4030 case PPC::RLWNM_rec
:
4031 case PPC::RLWNM8_rec
:
4042 III
.SignedImm
= false;
4043 III
.ZeroIsSpecialOrig
= 0;
4044 III
.ZeroIsSpecialNew
= 0;
4045 III
.IsCommutative
= false;
4046 // This isn't actually true, but the instructions ignore any of the
4047 // upper bits, so any immediate loaded with an LI is acceptable.
4048 // This does not apply to shift right algebraic because a value
4049 // out of range will produce a -1/0.
4051 if (Opc
== PPC::RLWNM
|| Opc
== PPC::RLWNM8
|| Opc
== PPC::RLWNM_rec
||
4052 Opc
== PPC::RLWNM8_rec
)
4053 III
.TruncateImmTo
= 5;
4055 III
.TruncateImmTo
= 6;
4057 default: llvm_unreachable("Unknown opcode");
4058 case PPC::RLWNM
: III
.ImmOpcode
= PPC::RLWINM
; break;
4059 case PPC::RLWNM8
: III
.ImmOpcode
= PPC::RLWINM8
; break;
4060 case PPC::RLWNM_rec
:
4061 III
.ImmOpcode
= PPC::RLWINM_rec
;
4063 case PPC::RLWNM8_rec
:
4064 III
.ImmOpcode
= PPC::RLWINM8_rec
;
4066 case PPC::SLW
: III
.ImmOpcode
= PPC::RLWINM
; break;
4067 case PPC::SLW8
: III
.ImmOpcode
= PPC::RLWINM8
; break;
4069 III
.ImmOpcode
= PPC::RLWINM_rec
;
4072 III
.ImmOpcode
= PPC::RLWINM8_rec
;
4074 case PPC::SRW
: III
.ImmOpcode
= PPC::RLWINM
; break;
4075 case PPC::SRW8
: III
.ImmOpcode
= PPC::RLWINM8
; break;
4077 III
.ImmOpcode
= PPC::RLWINM_rec
;
4080 III
.ImmOpcode
= PPC::RLWINM8_rec
;
4084 III
.TruncateImmTo
= 0;
4085 III
.ImmOpcode
= PPC::SRAWI
;
4089 III
.TruncateImmTo
= 0;
4090 III
.ImmOpcode
= PPC::SRAWI_rec
;
4095 case PPC::RLDCL_rec
:
4097 case PPC::RLDCR_rec
:
4104 III
.SignedImm
= false;
4105 III
.ZeroIsSpecialOrig
= 0;
4106 III
.ZeroIsSpecialNew
= 0;
4107 III
.IsCommutative
= false;
4108 // This isn't actually true, but the instructions ignore any of the
4109 // upper bits, so any immediate loaded with an LI is acceptable.
4110 // This does not apply to shift right algebraic because a value
4111 // out of range will produce a -1/0.
4113 if (Opc
== PPC::RLDCL
|| Opc
== PPC::RLDCL_rec
|| Opc
== PPC::RLDCR
||
4114 Opc
== PPC::RLDCR_rec
)
4115 III
.TruncateImmTo
= 6;
4117 III
.TruncateImmTo
= 7;
4119 default: llvm_unreachable("Unknown opcode");
4120 case PPC::RLDCL
: III
.ImmOpcode
= PPC::RLDICL
; break;
4121 case PPC::RLDCL_rec
:
4122 III
.ImmOpcode
= PPC::RLDICL_rec
;
4124 case PPC::RLDCR
: III
.ImmOpcode
= PPC::RLDICR
; break;
4125 case PPC::RLDCR_rec
:
4126 III
.ImmOpcode
= PPC::RLDICR_rec
;
4128 case PPC::SLD
: III
.ImmOpcode
= PPC::RLDICR
; break;
4130 III
.ImmOpcode
= PPC::RLDICR_rec
;
4132 case PPC::SRD
: III
.ImmOpcode
= PPC::RLDICL
; break;
4134 III
.ImmOpcode
= PPC::RLDICL_rec
;
4138 III
.TruncateImmTo
= 0;
4139 III
.ImmOpcode
= PPC::SRADI
;
4143 III
.TruncateImmTo
= 0;
4144 III
.ImmOpcode
= PPC::SRADI_rec
;
4148 // Loads and stores:
4170 III
.SignedImm
= true;
4171 III
.ZeroIsSpecialOrig
= 1;
4172 III
.ZeroIsSpecialNew
= 2;
4173 III
.IsCommutative
= true;
4174 III
.IsSummingOperands
= true;
4176 III
.OpNoForForwarding
= 2;
4178 default: llvm_unreachable("Unknown opcode");
4179 case PPC::LBZX
: III
.ImmOpcode
= PPC::LBZ
; break;
4180 case PPC::LBZX8
: III
.ImmOpcode
= PPC::LBZ8
; break;
4181 case PPC::LHZX
: III
.ImmOpcode
= PPC::LHZ
; break;
4182 case PPC::LHZX8
: III
.ImmOpcode
= PPC::LHZ8
; break;
4183 case PPC::LHAX
: III
.ImmOpcode
= PPC::LHA
; break;
4184 case PPC::LHAX8
: III
.ImmOpcode
= PPC::LHA8
; break;
4185 case PPC::LWZX
: III
.ImmOpcode
= PPC::LWZ
; break;
4186 case PPC::LWZX8
: III
.ImmOpcode
= PPC::LWZ8
; break;
4188 III
.ImmOpcode
= PPC::LWA
;
4189 III
.ImmMustBeMultipleOf
= 4;
4191 case PPC::LDX
: III
.ImmOpcode
= PPC::LD
; III
.ImmMustBeMultipleOf
= 4; break;
4192 case PPC::LFSX
: III
.ImmOpcode
= PPC::LFS
; break;
4193 case PPC::LFDX
: III
.ImmOpcode
= PPC::LFD
; break;
4194 case PPC::STBX
: III
.ImmOpcode
= PPC::STB
; break;
4195 case PPC::STBX8
: III
.ImmOpcode
= PPC::STB8
; break;
4196 case PPC::STHX
: III
.ImmOpcode
= PPC::STH
; break;
4197 case PPC::STHX8
: III
.ImmOpcode
= PPC::STH8
; break;
4198 case PPC::STWX
: III
.ImmOpcode
= PPC::STW
; break;
4199 case PPC::STWX8
: III
.ImmOpcode
= PPC::STW8
; break;
4201 III
.ImmOpcode
= PPC::STD
;
4202 III
.ImmMustBeMultipleOf
= 4;
4204 case PPC::STFSX
: III
.ImmOpcode
= PPC::STFS
; break;
4205 case PPC::STFDX
: III
.ImmOpcode
= PPC::STFD
; break;
4228 III
.SignedImm
= true;
4229 III
.ZeroIsSpecialOrig
= 2;
4230 III
.ZeroIsSpecialNew
= 3;
4231 III
.IsCommutative
= false;
4232 III
.IsSummingOperands
= true;
4234 III
.OpNoForForwarding
= 3;
4236 default: llvm_unreachable("Unknown opcode");
4237 case PPC::LBZUX
: III
.ImmOpcode
= PPC::LBZU
; break;
4238 case PPC::LBZUX8
: III
.ImmOpcode
= PPC::LBZU8
; break;
4239 case PPC::LHZUX
: III
.ImmOpcode
= PPC::LHZU
; break;
4240 case PPC::LHZUX8
: III
.ImmOpcode
= PPC::LHZU8
; break;
4241 case PPC::LHAUX
: III
.ImmOpcode
= PPC::LHAU
; break;
4242 case PPC::LHAUX8
: III
.ImmOpcode
= PPC::LHAU8
; break;
4243 case PPC::LWZUX
: III
.ImmOpcode
= PPC::LWZU
; break;
4244 case PPC::LWZUX8
: III
.ImmOpcode
= PPC::LWZU8
; break;
4246 III
.ImmOpcode
= PPC::LDU
;
4247 III
.ImmMustBeMultipleOf
= 4;
4249 case PPC::LFSUX
: III
.ImmOpcode
= PPC::LFSU
; break;
4250 case PPC::LFDUX
: III
.ImmOpcode
= PPC::LFDU
; break;
4251 case PPC::STBUX
: III
.ImmOpcode
= PPC::STBU
; break;
4252 case PPC::STBUX8
: III
.ImmOpcode
= PPC::STBU8
; break;
4253 case PPC::STHUX
: III
.ImmOpcode
= PPC::STHU
; break;
4254 case PPC::STHUX8
: III
.ImmOpcode
= PPC::STHU8
; break;
4255 case PPC::STWUX
: III
.ImmOpcode
= PPC::STWU
; break;
4256 case PPC::STWUX8
: III
.ImmOpcode
= PPC::STWU8
; break;
4258 III
.ImmOpcode
= PPC::STDU
;
4259 III
.ImmMustBeMultipleOf
= 4;
4261 case PPC::STFSUX
: III
.ImmOpcode
= PPC::STFSU
; break;
4262 case PPC::STFDUX
: III
.ImmOpcode
= PPC::STFDU
; break;
4265 // Power9 and up only. For some of these, the X-Form version has access to all
4266 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4267 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4268 // into or stored from is one of the VR registers.
4275 case PPC::XFLOADf32
:
4276 case PPC::XFLOADf64
:
4277 case PPC::XFSTOREf32
:
4278 case PPC::XFSTOREf64
:
4279 if (!Subtarget
.hasP9Vector())
4281 III
.SignedImm
= true;
4282 III
.ZeroIsSpecialOrig
= 1;
4283 III
.ZeroIsSpecialNew
= 2;
4284 III
.IsCommutative
= true;
4285 III
.IsSummingOperands
= true;
4287 III
.OpNoForForwarding
= 2;
4288 III
.ImmMustBeMultipleOf
= 4;
4290 default: llvm_unreachable("Unknown opcode");
4292 III
.ImmOpcode
= PPC::LXV
;
4293 III
.ImmMustBeMultipleOf
= 16;
4298 III
.ImmOpcode
= PPC::LXSSP
;
4300 III
.ImmOpcode
= PPC::LFS
;
4301 III
.ImmMustBeMultipleOf
= 1;
4306 case PPC::XFLOADf32
:
4307 III
.ImmOpcode
= PPC::DFLOADf32
;
4312 III
.ImmOpcode
= PPC::LXSD
;
4314 III
.ImmOpcode
= PPC::LFD
;
4315 III
.ImmMustBeMultipleOf
= 1;
4320 case PPC::XFLOADf64
:
4321 III
.ImmOpcode
= PPC::DFLOADf64
;
4324 III
.ImmOpcode
= PPC::STXV
;
4325 III
.ImmMustBeMultipleOf
= 16;
4330 III
.ImmOpcode
= PPC::STXSSP
;
4332 III
.ImmOpcode
= PPC::STFS
;
4333 III
.ImmMustBeMultipleOf
= 1;
4338 case PPC::XFSTOREf32
:
4339 III
.ImmOpcode
= PPC::DFSTOREf32
;
4344 III
.ImmOpcode
= PPC::STXSD
;
4346 III
.ImmOpcode
= PPC::STFD
;
4347 III
.ImmMustBeMultipleOf
= 1;
4352 case PPC::XFSTOREf64
:
4353 III
.ImmOpcode
= PPC::DFSTOREf64
;
4361 // Utility function for swaping two arbitrary operands of an instruction.
4362 static void swapMIOperands(MachineInstr
&MI
, unsigned Op1
, unsigned Op2
) {
4363 assert(Op1
!= Op2
&& "Cannot swap operand with itself.");
4365 unsigned MaxOp
= std::max(Op1
, Op2
);
4366 unsigned MinOp
= std::min(Op1
, Op2
);
4367 MachineOperand MOp1
= MI
.getOperand(MinOp
);
4368 MachineOperand MOp2
= MI
.getOperand(MaxOp
);
4369 MI
.removeOperand(std::max(Op1
, Op2
));
4370 MI
.removeOperand(std::min(Op1
, Op2
));
4372 // If the operands we are swapping are the two at the end (the common case)
4373 // we can just remove both and add them in the opposite order.
4374 if (MaxOp
- MinOp
== 1 && MI
.getNumOperands() == MinOp
) {
4375 MI
.addOperand(MOp2
);
4376 MI
.addOperand(MOp1
);
4378 // Store all operands in a temporary vector, remove them and re-add in the
4380 SmallVector
<MachineOperand
, 2> MOps
;
4381 unsigned TotalOps
= MI
.getNumOperands() + 2; // We've already removed 2 ops.
4382 for (unsigned i
= MI
.getNumOperands() - 1; i
>= MinOp
; i
--) {
4383 MOps
.push_back(MI
.getOperand(i
));
4384 MI
.removeOperand(i
);
4386 // MOp2 needs to be added next.
4387 MI
.addOperand(MOp2
);
4388 // Now add the rest.
4389 for (unsigned i
= MI
.getNumOperands(); i
< TotalOps
; i
++) {
4391 MI
.addOperand(MOp1
);
4393 MI
.addOperand(MOps
.back());
4400 // Check if the 'MI' that has the index OpNoForForwarding
4401 // meets the requirement described in the ImmInstrInfo.
4402 bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr
&MI
,
4403 const ImmInstrInfo
&III
,
4404 unsigned OpNoForForwarding
4406 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4407 // would not work pre-RA, we can only do the check post RA.
4408 MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
4412 // Cannot do the transform if MI isn't summing the operands.
4413 if (!III
.IsSummingOperands
)
4416 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4417 if (!III
.ZeroIsSpecialOrig
)
4420 // We cannot do the transform if the operand we are trying to replace
4421 // isn't the same as the operand the instruction allows.
4422 if (OpNoForForwarding
!= III
.OpNoForForwarding
)
4425 // Check if the instruction we are trying to transform really has
4426 // the special zero register as its operand.
4427 if (MI
.getOperand(III
.ZeroIsSpecialOrig
).getReg() != PPC::ZERO
&&
4428 MI
.getOperand(III
.ZeroIsSpecialOrig
).getReg() != PPC::ZERO8
)
4431 // This machine instruction is convertible if it is,
4432 // 1. summing the operands.
4433 // 2. one of the operands is special zero register.
4434 // 3. the operand we are trying to replace is allowed by the MI.
4438 // Check if the DefMI is the add inst and set the ImmMO and RegMO
4440 bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr
&DefMI
,
4441 const ImmInstrInfo
&III
,
4442 MachineOperand
*&ImmMO
,
4443 MachineOperand
*&RegMO
) const {
4444 unsigned Opc
= DefMI
.getOpcode();
4445 if (Opc
!= PPC::ADDItocL8
&& Opc
!= PPC::ADDI
&& Opc
!= PPC::ADDI8
)
4448 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4449 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4450 // apply for AIX toc-data as well.
4451 if (Opc
== PPC::ADDItocL8
&& Subtarget
.isAIX())
4454 assert(DefMI
.getNumOperands() >= 3 &&
4455 "Add inst must have at least three operands");
4456 RegMO
= &DefMI
.getOperand(1);
4457 ImmMO
= &DefMI
.getOperand(2);
4459 // Before RA, ADDI first operand could be a frame index.
4460 if (!RegMO
->isReg())
4463 // This DefMI is elgible for forwarding if it is:
4465 // 2. one of the operands is Imm/CPI/Global.
4466 return isAnImmediateOperand(*ImmMO
);
4469 bool PPCInstrInfo::isRegElgibleForForwarding(
4470 const MachineOperand
&RegMO
, const MachineInstr
&DefMI
,
4471 const MachineInstr
&MI
, bool KillDefMI
,
4472 bool &IsFwdFeederRegKilled
, bool &SeenIntermediateUse
) const {
4475 // z = lfdx 0, x -> z = lfd imm(y)
4476 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4477 // of "y" between the DEF of "x" and "z".
4478 // The query is only valid post RA.
4479 const MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
4483 Register Reg
= RegMO
.getReg();
4485 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4486 MachineBasicBlock::const_reverse_iterator It
= MI
;
4487 MachineBasicBlock::const_reverse_iterator E
= MI
.getParent()->rend();
4489 for (; It
!= E
; ++It
) {
4490 if (It
->modifiesRegister(Reg
, &getRegisterInfo()) && (&*It
) != &DefMI
)
4492 else if (It
->killsRegister(Reg
, &getRegisterInfo()) && (&*It
) != &DefMI
)
4493 IsFwdFeederRegKilled
= true;
4494 if (It
->readsRegister(Reg
, &getRegisterInfo()) && (&*It
) != &DefMI
)
4495 SeenIntermediateUse
= true;
4496 // Made it to DefMI without encountering a clobber.
4497 if ((&*It
) == &DefMI
)
4500 assert((&*It
) == &DefMI
&& "DefMI is missing");
4502 // If DefMI also defines the register to be forwarded, we can only forward it
4503 // if DefMI is being erased.
4504 if (DefMI
.modifiesRegister(Reg
, &getRegisterInfo()))
4510 bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand
&ImmMO
,
4511 const MachineInstr
&DefMI
,
4512 const ImmInstrInfo
&III
,
4514 int64_t BaseImm
) const {
4515 assert(isAnImmediateOperand(ImmMO
) && "ImmMO is NOT an immediate");
4516 if (DefMI
.getOpcode() == PPC::ADDItocL8
) {
4517 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4518 // However, we know that, it is 16-bit width, and has the alignment of 4.
4519 // Check if the instruction met the requirement.
4520 if (III
.ImmMustBeMultipleOf
> 4 ||
4521 III
.TruncateImmTo
|| III
.ImmWidth
!= 16)
4524 // Going from XForm to DForm loads means that the displacement needs to be
4525 // not just an immediate but also a multiple of 4, or 16 depending on the
4526 // load. A DForm load cannot be represented if it is a multiple of say 2.
4527 // XForm loads do not have this restriction.
4528 if (ImmMO
.isGlobal()) {
4529 const DataLayout
&DL
= ImmMO
.getGlobal()->getDataLayout();
4530 if (ImmMO
.getGlobal()->getPointerAlignment(DL
) < III
.ImmMustBeMultipleOf
)
4537 if (ImmMO
.isImm()) {
4538 // It is Imm, we need to check if the Imm fit the range.
4539 // Sign-extend to 64-bits.
4540 // DefMI may be folded with another imm form instruction, the result Imm is
4541 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4542 APInt
ActualValue(64, ImmMO
.getImm() + BaseImm
, true);
4543 if (III
.SignedImm
&& !ActualValue
.isSignedIntN(III
.ImmWidth
))
4545 if (!III
.SignedImm
&& !ActualValue
.isIntN(III
.ImmWidth
))
4547 Imm
= SignExtend64
<16>(ImmMO
.getImm() + BaseImm
);
4549 if (Imm
% III
.ImmMustBeMultipleOf
)
4551 if (III
.TruncateImmTo
)
4552 Imm
&= ((1 << III
.TruncateImmTo
) - 1);
4557 // This ImmMO is forwarded if it meets the requriement describle
4562 bool PPCInstrInfo::simplifyToLI(MachineInstr
&MI
, MachineInstr
&DefMI
,
4563 unsigned OpNoForForwarding
,
4564 MachineInstr
**KilledDef
) const {
4565 if ((DefMI
.getOpcode() != PPC::LI
&& DefMI
.getOpcode() != PPC::LI8
) ||
4566 !DefMI
.getOperand(1).isImm())
4569 MachineFunction
*MF
= MI
.getParent()->getParent();
4570 MachineRegisterInfo
*MRI
= &MF
->getRegInfo();
4571 bool PostRA
= !MRI
->isSSA();
4573 int64_t Immediate
= DefMI
.getOperand(1).getImm();
4574 // Sign-extend to 64-bits.
4575 int64_t SExtImm
= SignExtend64
<16>(Immediate
);
4577 bool ReplaceWithLI
= false;
4578 bool Is64BitLI
= false;
4581 unsigned Opc
= MI
.getOpcode();
4586 // FIXME: Any branches conditional on such a comparison can be made
4587 // unconditional. At this time, this happens too infrequently to be worth
4588 // the implementation effort, but if that ever changes, we could convert
4589 // such a pattern here.
4594 // Doing this post-RA would require dataflow analysis to reliably find uses
4595 // of the CR register set by the compare.
4596 // No need to fixup killed/dead flag since this transformation is only valid
4600 // If a compare-immediate is fed by an immediate and is itself an input of
4601 // an ISEL (the most common case) into a COPY of the correct register.
4602 bool Changed
= false;
4603 Register DefReg
= MI
.getOperand(0).getReg();
4604 int64_t Comparand
= MI
.getOperand(2).getImm();
4605 int64_t SExtComparand
= ((uint64_t)Comparand
& ~0x7FFFuLL
) != 0
4606 ? (Comparand
| 0xFFFFFFFFFFFF0000)
4609 for (auto &CompareUseMI
: MRI
->use_instructions(DefReg
)) {
4610 unsigned UseOpc
= CompareUseMI
.getOpcode();
4611 if (UseOpc
!= PPC::ISEL
&& UseOpc
!= PPC::ISEL8
)
4613 unsigned CRSubReg
= CompareUseMI
.getOperand(3).getSubReg();
4614 Register TrueReg
= CompareUseMI
.getOperand(1).getReg();
4615 Register FalseReg
= CompareUseMI
.getOperand(2).getReg();
4616 unsigned RegToCopy
=
4617 selectReg(SExtImm
, SExtComparand
, Opc
, TrueReg
, FalseReg
, CRSubReg
);
4618 if (RegToCopy
== PPC::NoRegister
)
4620 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4621 if (RegToCopy
== PPC::ZERO
|| RegToCopy
== PPC::ZERO8
) {
4622 CompareUseMI
.setDesc(get(UseOpc
== PPC::ISEL8
? PPC::LI8
: PPC::LI
));
4623 replaceInstrOperandWithImm(CompareUseMI
, 1, 0);
4624 CompareUseMI
.removeOperand(3);
4625 CompareUseMI
.removeOperand(2);
4629 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4630 LLVM_DEBUG(DefMI
.dump(); MI
.dump(); CompareUseMI
.dump());
4631 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4632 // Convert to copy and remove unneeded operands.
4633 CompareUseMI
.setDesc(get(PPC::COPY
));
4634 CompareUseMI
.removeOperand(3);
4635 CompareUseMI
.removeOperand(RegToCopy
== TrueReg
? 2 : 1);
4636 CmpIselsConverted
++;
4638 LLVM_DEBUG(CompareUseMI
.dump());
4642 // This may end up incremented multiple times since this function is called
4643 // during a fixed-point transformation, but it is only meant to indicate the
4644 // presence of this opportunity.
4645 MissedConvertibleImmediateInstrs
++;
4649 // Immediate forms - may simply be convertable to an LI.
4652 // Does the sum fit in a 16-bit signed field?
4653 int64_t Addend
= MI
.getOperand(2).getImm();
4654 if (isInt
<16>(Addend
+ SExtImm
)) {
4655 ReplaceWithLI
= true;
4656 Is64BitLI
= Opc
== PPC::ADDI8
;
4657 NewImm
= Addend
+ SExtImm
;
4663 case PPC::SUBFIC8
: {
4664 // Only transform this if the CARRY implicit operand is dead.
4665 if (MI
.getNumOperands() > 3 && !MI
.getOperand(3).isDead())
4667 int64_t Minuend
= MI
.getOperand(2).getImm();
4668 if (isInt
<16>(Minuend
- SExtImm
)) {
4669 ReplaceWithLI
= true;
4670 Is64BitLI
= Opc
== PPC::SUBFIC8
;
4671 NewImm
= Minuend
- SExtImm
;
4677 case PPC::RLDICL_rec
:
4678 case PPC::RLDICL_32
:
4679 case PPC::RLDICL_32_64
: {
4680 // Use APInt's rotate function.
4681 int64_t SH
= MI
.getOperand(2).getImm();
4682 int64_t MB
= MI
.getOperand(3).getImm();
4683 APInt
InVal((Opc
== PPC::RLDICL
|| Opc
== PPC::RLDICL_rec
) ? 64 : 32,
4685 InVal
= InVal
.rotl(SH
);
4686 uint64_t Mask
= MB
== 0 ? -1LLU : (1LLU << (63 - MB
+ 1)) - 1;
4688 // Can't replace negative values with an LI as that will sign-extend
4689 // and not clear the left bits. If we're setting the CR bit, we will use
4690 // ANDI_rec which won't sign extend, so that's safe.
4691 if (isUInt
<15>(InVal
.getSExtValue()) ||
4692 (Opc
== PPC::RLDICL_rec
&& isUInt
<16>(InVal
.getSExtValue()))) {
4693 ReplaceWithLI
= true;
4694 Is64BitLI
= Opc
!= PPC::RLDICL_32
;
4695 NewImm
= InVal
.getSExtValue();
4696 SetCR
= Opc
== PPC::RLDICL_rec
;
4703 case PPC::RLWINM_rec
:
4704 case PPC::RLWINM8_rec
: {
4705 int64_t SH
= MI
.getOperand(2).getImm();
4706 int64_t MB
= MI
.getOperand(3).getImm();
4707 int64_t ME
= MI
.getOperand(4).getImm();
4708 APInt
InVal(32, SExtImm
, true);
4709 InVal
= InVal
.rotl(SH
);
4710 APInt Mask
= APInt::getBitsSetWithWrap(32, 32 - ME
- 1, 32 - MB
);
4712 // Can't replace negative values with an LI as that will sign-extend
4713 // and not clear the left bits. If we're setting the CR bit, we will use
4714 // ANDI_rec which won't sign extend, so that's safe.
4715 bool ValueFits
= isUInt
<15>(InVal
.getSExtValue());
4716 ValueFits
|= ((Opc
== PPC::RLWINM_rec
|| Opc
== PPC::RLWINM8_rec
) &&
4717 isUInt
<16>(InVal
.getSExtValue()));
4719 ReplaceWithLI
= true;
4720 Is64BitLI
= Opc
== PPC::RLWINM8
|| Opc
== PPC::RLWINM8_rec
;
4721 NewImm
= InVal
.getSExtValue();
4722 SetCR
= Opc
== PPC::RLWINM_rec
|| Opc
== PPC::RLWINM8_rec
;
4731 int64_t LogicalImm
= MI
.getOperand(2).getImm();
4733 if (Opc
== PPC::ORI
|| Opc
== PPC::ORI8
)
4734 Result
= LogicalImm
| SExtImm
;
4736 Result
= LogicalImm
^ SExtImm
;
4737 if (isInt
<16>(Result
)) {
4738 ReplaceWithLI
= true;
4739 Is64BitLI
= Opc
== PPC::ORI8
|| Opc
== PPC::XORI8
;
4747 if (ReplaceWithLI
) {
4748 // We need to be careful with CR-setting instructions we're replacing.
4750 // We don't know anything about uses when we're out of SSA, so only
4751 // replace if the new immediate will be reproduced.
4752 bool ImmChanged
= (SExtImm
& NewImm
) != NewImm
;
4753 if (PostRA
&& ImmChanged
)
4757 // If the defining load-immediate has no other uses, we can just replace
4758 // the immediate with the new immediate.
4759 if (MRI
->hasOneUse(DefMI
.getOperand(0).getReg()))
4760 DefMI
.getOperand(1).setImm(NewImm
);
4762 // If we're not using the GPR result of the CR-setting instruction, we
4763 // just need to and with zero/non-zero depending on the new immediate.
4764 else if (MRI
->use_empty(MI
.getOperand(0).getReg())) {
4766 assert(Immediate
&& "Transformation converted zero to non-zero?");
4769 } else if (ImmChanged
)
4774 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4775 LLVM_DEBUG(MI
.dump());
4776 LLVM_DEBUG(dbgs() << "Fed by:\n");
4777 LLVM_DEBUG(DefMI
.dump());
4778 LoadImmediateInfo LII
;
4780 LII
.Is64Bit
= Is64BitLI
;
4782 // If we're setting the CR, the original load-immediate must be kept (as an
4783 // operand to ANDI_rec/ANDI8_rec).
4784 if (KilledDef
&& SetCR
)
4785 *KilledDef
= nullptr;
4786 replaceInstrWithLI(MI
, LII
);
4789 recomputeLivenessFlags(*MI
.getParent());
4791 LLVM_DEBUG(dbgs() << "With:\n");
4792 LLVM_DEBUG(MI
.dump());
4798 bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4799 MachineInstr
&MI
, MachineInstr
&DefMI
, unsigned OpNoForForwarding
) const {
4800 MachineRegisterInfo
*MRI
= &MI
.getParent()->getParent()->getRegInfo();
4801 bool PostRA
= !MRI
->isSSA();
4802 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4807 // Only handle load/store.
4808 if (!MI
.mayLoadOrStore())
4811 unsigned XFormOpcode
= RI
.getMappedIdxOpcForImmOpc(MI
.getOpcode());
4813 assert((XFormOpcode
!= PPC::INSTRUCTION_LIST_END
) &&
4814 "MI must have x-form opcode");
4816 // get Imm Form info.
4818 bool IsVFReg
= MI
.getOperand(0).isReg()
4819 ? PPC::isVFRegister(MI
.getOperand(0).getReg())
4822 if (!instrHasImmForm(XFormOpcode
, IsVFReg
, III
, PostRA
))
4825 if (!III
.IsSummingOperands
)
4828 if (OpNoForForwarding
!= III
.OpNoForForwarding
)
4831 MachineOperand ImmOperandMI
= MI
.getOperand(III
.ImmOpNo
);
4832 if (!ImmOperandMI
.isImm())
4836 MachineOperand
*ImmMO
= nullptr;
4837 MachineOperand
*RegMO
= nullptr;
4838 if (!isDefMIElgibleForForwarding(DefMI
, III
, ImmMO
, RegMO
))
4840 assert(ImmMO
&& RegMO
&& "Imm and Reg operand must have been set");
4843 // Set ImmBase from imm instruction as base and get new Imm inside
4844 // isImmElgibleForForwarding.
4845 int64_t ImmBase
= ImmOperandMI
.getImm();
4847 if (!isImmElgibleForForwarding(*ImmMO
, DefMI
, III
, Imm
, ImmBase
))
4851 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4852 LLVM_DEBUG(MI
.dump());
4853 LLVM_DEBUG(dbgs() << "Fed by:\n");
4854 LLVM_DEBUG(DefMI
.dump());
4856 MI
.getOperand(III
.OpNoForForwarding
).setReg(RegMO
->getReg());
4857 MI
.getOperand(III
.ImmOpNo
).setImm(Imm
);
4859 LLVM_DEBUG(dbgs() << "With:\n");
4860 LLVM_DEBUG(MI
.dump());
4864 // If an X-Form instruction is fed by an add-immediate and one of its operands
4865 // is the literal zero, attempt to forward the source of the add-immediate to
4866 // the corresponding D-Form instruction with the displacement coming from
4867 // the immediate being added.
4868 bool PPCInstrInfo::transformToImmFormFedByAdd(
4869 MachineInstr
&MI
, const ImmInstrInfo
&III
, unsigned OpNoForForwarding
,
4870 MachineInstr
&DefMI
, bool KillDefMI
) const {
4873 // x = addi reg, imm <----- DefMI
4874 // y = op 0 , x <----- MI
4876 // OpNoForForwarding
4877 // Check if the MI meet the requirement described in the III.
4878 if (!isUseMIElgibleForForwarding(MI
, III
, OpNoForForwarding
))
4881 // Check if the DefMI meet the requirement
4882 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4883 MachineOperand
*ImmMO
= nullptr;
4884 MachineOperand
*RegMO
= nullptr;
4885 if (!isDefMIElgibleForForwarding(DefMI
, III
, ImmMO
, RegMO
))
4887 assert(ImmMO
&& RegMO
&& "Imm and Reg operand must have been set");
4889 // As we get the Imm operand now, we need to check if the ImmMO meet
4890 // the requirement described in the III. If yes set the Imm.
4892 if (!isImmElgibleForForwarding(*ImmMO
, DefMI
, III
, Imm
))
4895 bool IsFwdFeederRegKilled
= false;
4896 bool SeenIntermediateUse
= false;
4897 // Check if the RegMO can be forwarded to MI.
4898 if (!isRegElgibleForForwarding(*RegMO
, DefMI
, MI
, KillDefMI
,
4899 IsFwdFeederRegKilled
, SeenIntermediateUse
))
4902 MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
4903 bool PostRA
= !MRI
.isSSA();
4905 // We know that, the MI and DefMI both meet the pattern, and
4906 // the Imm also meet the requirement with the new Imm-form.
4907 // It is safe to do the transformation now.
4908 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4909 LLVM_DEBUG(MI
.dump());
4910 LLVM_DEBUG(dbgs() << "Fed by:\n");
4911 LLVM_DEBUG(DefMI
.dump());
4913 // Update the base reg first.
4914 MI
.getOperand(III
.OpNoForForwarding
).ChangeToRegister(RegMO
->getReg(),
4918 // Then, update the imm.
4919 if (ImmMO
->isImm()) {
4920 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4922 replaceInstrOperandWithImm(MI
, III
.ZeroIsSpecialOrig
, Imm
);
4925 // Otherwise, it is Constant Pool Index(CPI) or Global,
4926 // which is relocation in fact. We need to replace the special zero
4927 // register with ImmMO.
4928 // Before that, we need to fixup the target flags for imm.
4929 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4930 if (DefMI
.getOpcode() == PPC::ADDItocL8
)
4931 ImmMO
->setTargetFlags(PPCII::MO_TOC_LO
);
4933 // MI didn't have the interface such as MI.setOperand(i) though
4934 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4935 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4936 // and, add the ImmMO, then, move back all the operands behind ZERO.
4937 SmallVector
<MachineOperand
, 2> MOps
;
4938 for (unsigned i
= MI
.getNumOperands() - 1; i
>= III
.ZeroIsSpecialOrig
; i
--) {
4939 MOps
.push_back(MI
.getOperand(i
));
4940 MI
.removeOperand(i
);
4943 // Remove the last MO in the list, which is ZERO operand in fact.
4945 // Add the imm operand.
4946 MI
.addOperand(*ImmMO
);
4947 // Now add the rest back.
4948 for (auto &MO
: MOps
)
4952 // Update the opcode.
4953 MI
.setDesc(get(III
.ImmOpcode
));
4956 recomputeLivenessFlags(*MI
.getParent());
4957 LLVM_DEBUG(dbgs() << "With:\n");
4958 LLVM_DEBUG(MI
.dump());
4963 bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr
&MI
,
4964 const ImmInstrInfo
&III
,
4965 unsigned ConstantOpNo
,
4966 MachineInstr
&DefMI
) const {
4967 // DefMI must be LI or LI8.
4968 if ((DefMI
.getOpcode() != PPC::LI
&& DefMI
.getOpcode() != PPC::LI8
) ||
4969 !DefMI
.getOperand(1).isImm())
4972 // Get Imm operand and Sign-extend to 64-bits.
4973 int64_t Imm
= SignExtend64
<16>(DefMI
.getOperand(1).getImm());
4975 MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
4976 bool PostRA
= !MRI
.isSSA();
4977 // Exit early if we can't convert this.
4978 if ((ConstantOpNo
!= III
.OpNoForForwarding
) && !III
.IsCommutative
)
4980 if (Imm
% III
.ImmMustBeMultipleOf
)
4982 if (III
.TruncateImmTo
)
4983 Imm
&= ((1 << III
.TruncateImmTo
) - 1);
4984 if (III
.SignedImm
) {
4985 APInt
ActualValue(64, Imm
, true);
4986 if (!ActualValue
.isSignedIntN(III
.ImmWidth
))
4989 uint64_t UnsignedMax
= (1 << III
.ImmWidth
) - 1;
4990 if ((uint64_t)Imm
> UnsignedMax
)
4994 // If we're post-RA, the instructions don't agree on whether register zero is
4995 // special, we can transform this as long as the register operand that will
4996 // end up in the location where zero is special isn't R0.
4997 if (PostRA
&& III
.ZeroIsSpecialOrig
!= III
.ZeroIsSpecialNew
) {
4998 unsigned PosForOrigZero
= III
.ZeroIsSpecialOrig
? III
.ZeroIsSpecialOrig
:
4999 III
.ZeroIsSpecialNew
+ 1;
5000 Register OrigZeroReg
= MI
.getOperand(PosForOrigZero
).getReg();
5001 Register NewZeroReg
= MI
.getOperand(III
.ZeroIsSpecialNew
).getReg();
5002 // If R0 is in the operand where zero is special for the new instruction,
5003 // it is unsafe to transform if the constant operand isn't that operand.
5004 if ((NewZeroReg
== PPC::R0
|| NewZeroReg
== PPC::X0
) &&
5005 ConstantOpNo
!= III
.ZeroIsSpecialNew
)
5007 if ((OrigZeroReg
== PPC::R0
|| OrigZeroReg
== PPC::X0
) &&
5008 ConstantOpNo
!= PosForOrigZero
)
5012 unsigned Opc
= MI
.getOpcode();
5013 bool SpecialShift32
= Opc
== PPC::SLW
|| Opc
== PPC::SLW_rec
||
5014 Opc
== PPC::SRW
|| Opc
== PPC::SRW_rec
||
5015 Opc
== PPC::SLW8
|| Opc
== PPC::SLW8_rec
||
5016 Opc
== PPC::SRW8
|| Opc
== PPC::SRW8_rec
;
5017 bool SpecialShift64
= Opc
== PPC::SLD
|| Opc
== PPC::SLD_rec
||
5018 Opc
== PPC::SRD
|| Opc
== PPC::SRD_rec
;
5019 bool SetCR
= Opc
== PPC::SLW_rec
|| Opc
== PPC::SRW_rec
||
5020 Opc
== PPC::SLD_rec
|| Opc
== PPC::SRD_rec
;
5021 bool RightShift
= Opc
== PPC::SRW
|| Opc
== PPC::SRW_rec
|| Opc
== PPC::SRD
||
5022 Opc
== PPC::SRD_rec
;
5024 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5025 LLVM_DEBUG(MI
.dump());
5026 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5027 LLVM_DEBUG(DefMI
.dump());
5028 MI
.setDesc(get(III
.ImmOpcode
));
5029 if (ConstantOpNo
== III
.OpNoForForwarding
) {
5030 // Converting shifts to immediate form is a bit tricky since they may do
5031 // one of three things:
5032 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5033 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5035 // 3. If the shift amount is in [1, OpSize), it's just a shift
5036 if (SpecialShift32
|| SpecialShift64
) {
5037 LoadImmediateInfo LII
;
5040 LII
.Is64Bit
= SpecialShift64
;
5041 uint64_t ShAmt
= Imm
& (SpecialShift32
? 0x1F : 0x3F);
5042 if (Imm
& (SpecialShift32
? 0x20 : 0x40))
5043 replaceInstrWithLI(MI
, LII
);
5044 // Shifts by zero don't change the value. If we don't need to set CR0,
5045 // just convert this to a COPY. Can't do this post-RA since we've already
5046 // cleaned up the copies.
5047 else if (!SetCR
&& ShAmt
== 0 && !PostRA
) {
5048 MI
.removeOperand(2);
5049 MI
.setDesc(get(PPC::COPY
));
5051 // The 32 bit and 64 bit instructions are quite different.
5052 if (SpecialShift32
) {
5053 // Left shifts use (N, 0, 31-N).
5054 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5055 // use (0, 0, 31) if N == 0.
5056 uint64_t SH
= ShAmt
== 0 ? 0 : RightShift
? 32 - ShAmt
: ShAmt
;
5057 uint64_t MB
= RightShift
? ShAmt
: 0;
5058 uint64_t ME
= RightShift
? 31 : 31 - ShAmt
;
5059 replaceInstrOperandWithImm(MI
, III
.OpNoForForwarding
, SH
);
5060 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
).addImm(MB
)
5063 // Left shifts use (N, 63-N).
5064 // Right shifts use (64-N, N) if 0 < N < 64.
5065 // use (0, 0) if N == 0.
5066 uint64_t SH
= ShAmt
== 0 ? 0 : RightShift
? 64 - ShAmt
: ShAmt
;
5067 uint64_t ME
= RightShift
? ShAmt
: 63 - ShAmt
;
5068 replaceInstrOperandWithImm(MI
, III
.OpNoForForwarding
, SH
);
5069 MachineInstrBuilder(*MI
.getParent()->getParent(), MI
).addImm(ME
);
5073 replaceInstrOperandWithImm(MI
, ConstantOpNo
, Imm
);
5075 // Convert commutative instructions (switch the operands and convert the
5076 // desired one to an immediate.
5077 else if (III
.IsCommutative
) {
5078 replaceInstrOperandWithImm(MI
, ConstantOpNo
, Imm
);
5079 swapMIOperands(MI
, ConstantOpNo
, III
.OpNoForForwarding
);
5081 llvm_unreachable("Should have exited early!");
5083 // For instructions for which the constant register replaces a different
5084 // operand than where the immediate goes, we need to swap them.
5085 if (III
.OpNoForForwarding
!= III
.ImmOpNo
)
5086 swapMIOperands(MI
, III
.OpNoForForwarding
, III
.ImmOpNo
);
5088 // If the special R0/X0 register index are different for original instruction
5089 // and new instruction, we need to fix up the register class in new
5091 if (!PostRA
&& III
.ZeroIsSpecialOrig
!= III
.ZeroIsSpecialNew
) {
5092 if (III
.ZeroIsSpecialNew
) {
5093 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5094 // need to fix up register class.
5095 Register RegToModify
= MI
.getOperand(III
.ZeroIsSpecialNew
).getReg();
5096 if (RegToModify
.isVirtual()) {
5097 const TargetRegisterClass
*NewRC
=
5098 MRI
.getRegClass(RegToModify
)->hasSuperClassEq(&PPC::GPRCRegClass
) ?
5099 &PPC::GPRC_and_GPRC_NOR0RegClass
: &PPC::G8RC_and_G8RC_NOX0RegClass
;
5100 MRI
.setRegClass(RegToModify
, NewRC
);
5106 recomputeLivenessFlags(*MI
.getParent());
5108 LLVM_DEBUG(dbgs() << "With: ");
5109 LLVM_DEBUG(MI
.dump());
5110 LLVM_DEBUG(dbgs() << "\n");
5114 const TargetRegisterClass
*
5115 PPCInstrInfo::updatedRC(const TargetRegisterClass
*RC
) const {
5116 if (Subtarget
.hasVSX() && RC
== &PPC::VRRCRegClass
)
5117 return &PPC::VSRCRegClass
;
5121 int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode
) {
5122 return PPC::getRecordFormOpcode(Opcode
);
5125 static bool isOpZeroOfSubwordPreincLoad(int Opcode
) {
5126 return (Opcode
== PPC::LBZU
|| Opcode
== PPC::LBZUX
|| Opcode
== PPC::LBZU8
||
5127 Opcode
== PPC::LBZUX8
|| Opcode
== PPC::LHZU
||
5128 Opcode
== PPC::LHZUX
|| Opcode
== PPC::LHZU8
||
5129 Opcode
== PPC::LHZUX8
);
5132 // This function checks for sign extension from 32 bits to 64 bits.
5133 static bool definedBySignExtendingOp(const unsigned Reg
,
5134 const MachineRegisterInfo
*MRI
) {
5135 if (!Register::isVirtualRegister(Reg
))
5138 MachineInstr
*MI
= MRI
->getVRegDef(Reg
);
5142 int Opcode
= MI
->getOpcode();
5143 const PPCInstrInfo
*TII
=
5144 MI
->getMF()->getSubtarget
<PPCSubtarget
>().getInstrInfo();
5145 if (TII
->isSExt32To64(Opcode
))
5148 // The first def of LBZU/LHZU is sign extended.
5149 if (isOpZeroOfSubwordPreincLoad(Opcode
) && MI
->getOperand(0).getReg() == Reg
)
5152 // RLDICL generates sign-extended output if it clears at least
5153 // 33 bits from the left (MSB).
5154 if (Opcode
== PPC::RLDICL
&& MI
->getOperand(3).getImm() >= 33)
5157 // If at least one bit from left in a lower word is masked out,
5158 // all of 0 to 32-th bits of the output are cleared.
5159 // Hence the output is already sign extended.
5160 if ((Opcode
== PPC::RLWINM
|| Opcode
== PPC::RLWINM_rec
||
5161 Opcode
== PPC::RLWNM
|| Opcode
== PPC::RLWNM_rec
) &&
5162 MI
->getOperand(3).getImm() > 0 &&
5163 MI
->getOperand(3).getImm() <= MI
->getOperand(4).getImm())
5166 // If the most significant bit of immediate in ANDIS is zero,
5167 // all of 0 to 32-th bits are cleared.
5168 if (Opcode
== PPC::ANDIS_rec
|| Opcode
== PPC::ANDIS8_rec
) {
5169 uint16_t Imm
= MI
->getOperand(2).getImm();
5170 if ((Imm
& 0x8000) == 0)
5177 // This function checks the machine instruction that defines the input register
5178 // Reg. If that machine instruction always outputs a value that has only zeros
5179 // in the higher 32 bits then this function will return true.
5180 static bool definedByZeroExtendingOp(const unsigned Reg
,
5181 const MachineRegisterInfo
*MRI
) {
5182 if (!Register::isVirtualRegister(Reg
))
5185 MachineInstr
*MI
= MRI
->getVRegDef(Reg
);
5189 int Opcode
= MI
->getOpcode();
5190 const PPCInstrInfo
*TII
=
5191 MI
->getMF()->getSubtarget
<PPCSubtarget
>().getInstrInfo();
5192 if (TII
->isZExt32To64(Opcode
))
5195 // The first def of LBZU/LHZU/LWZU are zero extended.
5196 if ((isOpZeroOfSubwordPreincLoad(Opcode
) || Opcode
== PPC::LWZU
||
5197 Opcode
== PPC::LWZUX
|| Opcode
== PPC::LWZU8
|| Opcode
== PPC::LWZUX8
) &&
5198 MI
->getOperand(0).getReg() == Reg
)
5201 // The 16-bit immediate is sign-extended in li/lis.
5202 // If the most significant bit is zero, all higher bits are zero.
5203 if (Opcode
== PPC::LI
|| Opcode
== PPC::LI8
||
5204 Opcode
== PPC::LIS
|| Opcode
== PPC::LIS8
) {
5205 int64_t Imm
= MI
->getOperand(1).getImm();
5206 if (((uint64_t)Imm
& ~0x7FFFuLL
) == 0)
5210 // We have some variations of rotate-and-mask instructions
5211 // that clear higher 32-bits.
5212 if ((Opcode
== PPC::RLDICL
|| Opcode
== PPC::RLDICL_rec
||
5213 Opcode
== PPC::RLDCL
|| Opcode
== PPC::RLDCL_rec
||
5214 Opcode
== PPC::RLDICL_32_64
) &&
5215 MI
->getOperand(3).getImm() >= 32)
5218 if ((Opcode
== PPC::RLDIC
|| Opcode
== PPC::RLDIC_rec
) &&
5219 MI
->getOperand(3).getImm() >= 32 &&
5220 MI
->getOperand(3).getImm() <= 63 - MI
->getOperand(2).getImm())
5223 if ((Opcode
== PPC::RLWINM
|| Opcode
== PPC::RLWINM_rec
||
5224 Opcode
== PPC::RLWNM
|| Opcode
== PPC::RLWNM_rec
||
5225 Opcode
== PPC::RLWINM8
|| Opcode
== PPC::RLWNM8
) &&
5226 MI
->getOperand(3).getImm() <= MI
->getOperand(4).getImm())
5232 // This function returns true if the input MachineInstr is a TOC save
5234 bool PPCInstrInfo::isTOCSaveMI(const MachineInstr
&MI
) const {
5235 if (!MI
.getOperand(1).isImm() || !MI
.getOperand(2).isReg())
5237 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
5238 unsigned StackOffset
= MI
.getOperand(1).getImm();
5239 Register StackReg
= MI
.getOperand(2).getReg();
5240 Register SPReg
= Subtarget
.isPPC64() ? PPC::X1
: PPC::R1
;
5241 if (StackReg
== SPReg
&& StackOffset
== TOCSaveOffset
)
5247 // We limit the max depth to track incoming values of PHIs or binary ops
5248 // (e.g. AND) to avoid excessive cost.
5249 const unsigned MAX_BINOP_DEPTH
= 1;
5251 // This function will promote the instruction which defines the register `Reg`
5252 // in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5253 // used to check whether an instruction needs to be promoted or not is similar
5254 // to the logic used to check whether or not a defined register is sign or zero
5255 // extended within the function PPCInstrInfo::isSignOrZeroExtended.
5256 // Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5257 // BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5258 // incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5259 // than once. This is done to prevent exponential recursion.
5260 void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register
&Reg
,
5261 MachineRegisterInfo
*MRI
,
5262 unsigned BinOpDepth
,
5263 LiveVariables
*LV
) const {
5264 if (!Reg
.isVirtual())
5267 MachineInstr
*MI
= MRI
->getVRegDef(Reg
);
5271 unsigned Opcode
= MI
->getOpcode();
5278 if (BinOpDepth
>= MAX_BINOP_DEPTH
)
5280 unsigned OperandEnd
= 3, OperandStride
= 1;
5281 if (Opcode
== PPC::PHI
) {
5282 OperandEnd
= MI
->getNumOperands();
5286 for (unsigned I
= 1; I
< OperandEnd
; I
+= OperandStride
) {
5287 assert(MI
->getOperand(I
).isReg() && "Operand must be register");
5288 promoteInstr32To64ForElimEXTSW(MI
->getOperand(I
).getReg(), MRI
,
5289 BinOpDepth
+ 1, LV
);
5295 // Refers to the logic of the `case PPC::COPY` statement in the function
5296 // PPCInstrInfo::isSignOrZeroExtended().
5298 Register SrcReg
= MI
->getOperand(1).getReg();
5299 // In both ELFv1 and v2 ABI, method parameters and the return value
5300 // are sign- or zero-extended.
5301 const MachineFunction
*MF
= MI
->getMF();
5302 if (!MF
->getSubtarget
<PPCSubtarget
>().isSVR4ABI()) {
5303 // If this is a copy from another register, we recursively promote the
5305 promoteInstr32To64ForElimEXTSW(SrcReg
, MRI
, BinOpDepth
, LV
);
5309 // From here on everything is SVR4ABI. COPY will be eliminated in the other
5310 // pass, we do not need promote the COPY pseudo opcode.
5312 if (SrcReg
!= PPC::X3
)
5313 // If this is a copy from another register, we recursively promote the
5315 promoteInstr32To64ForElimEXTSW(SrcReg
, MRI
, BinOpDepth
, LV
);
5326 promoteInstr32To64ForElimEXTSW(MI
->getOperand(1).getReg(), MRI
, BinOpDepth
,
5331 if (BinOpDepth
>= MAX_BINOP_DEPTH
)
5334 promoteInstr32To64ForElimEXTSW(MI
->getOperand(1).getReg(), MRI
,
5335 BinOpDepth
+ 1, LV
);
5336 promoteInstr32To64ForElimEXTSW(MI
->getOperand(2).getReg(), MRI
,
5337 BinOpDepth
+ 1, LV
);
5341 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
5342 if (RC
== &PPC::G8RCRegClass
|| RC
== &PPC::G8RC_and_G8RC_NOX0RegClass
)
5345 const PPCInstrInfo
*TII
=
5346 MI
->getMF()->getSubtarget
<PPCSubtarget
>().getInstrInfo();
5348 // Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5349 // extended themselves, but may have operands who's destination registers of
5350 // signed or zero extended instructions.
5351 std::unordered_map
<unsigned, unsigned> OpcodeMap
= {
5352 {PPC::OR
, PPC::OR8
}, {PPC::ISEL
, PPC::ISEL8
},
5353 {PPC::ORI
, PPC::ORI8
}, {PPC::XORI
, PPC::XORI8
},
5354 {PPC::ORIS
, PPC::ORIS8
}, {PPC::XORIS
, PPC::XORIS8
},
5355 {PPC::AND
, PPC::AND8
}};
5358 auto It
= OpcodeMap
.find(Opcode
);
5359 if (It
!= OpcodeMap
.end()) {
5360 // Set the new opcode to the mapped 64-bit version.
5361 NewOpcode
= It
->second
;
5363 if (!TII
->isSExt32To64(Opcode
))
5366 // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5367 // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5368 // instruction with the same opcode.
5369 NewOpcode
= PPC::get64BitInstrFromSignedExt32BitInstr(Opcode
);
5372 assert(NewOpcode
!= -1 &&
5373 "Must have a 64-bit opcode to map the 32-bit opcode!");
5375 const TargetRegisterInfo
*TRI
= MRI
->getTargetRegisterInfo();
5376 const MCInstrDesc
&MCID
= TII
->get(NewOpcode
);
5377 const TargetRegisterClass
*NewRC
=
5378 TRI
->getRegClass(MCID
.operands()[0].RegClass
);
5380 Register SrcReg
= MI
->getOperand(0).getReg();
5381 const TargetRegisterClass
*SrcRC
= MRI
->getRegClass(SrcReg
);
5383 // If the register class of the defined register in the 32-bit instruction
5384 // is the same as the register class of the defined register in the promoted
5385 // 64-bit instruction, we do not need to promote the instruction.
5389 DebugLoc DL
= MI
->getDebugLoc();
5390 auto MBB
= MI
->getParent();
5392 // Since the pseudo-opcode of the instruction is promoted from 32-bit to
5393 // 64-bit, if the source reg class of the original instruction belongs to
5394 // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5395 // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5397 DenseMap
<unsigned, Register
> PromoteRegs
;
5398 for (unsigned i
= 1; i
< MI
->getNumOperands(); i
++) {
5399 MachineOperand
&Operand
= MI
->getOperand(i
);
5400 if (!Operand
.isReg())
5403 Register OperandReg
= Operand
.getReg();
5404 if (!OperandReg
.isVirtual())
5407 const TargetRegisterClass
*NewUsedRegRC
=
5408 TRI
->getRegClass(MCID
.operands()[i
].RegClass
);
5409 const TargetRegisterClass
*OrgRC
= MRI
->getRegClass(OperandReg
);
5410 if (NewUsedRegRC
!= OrgRC
&& (OrgRC
== &PPC::GPRCRegClass
||
5411 OrgRC
== &PPC::GPRC_and_GPRC_NOR0RegClass
)) {
5412 // Promote the used 32-bit register to 64-bit register.
5413 Register TmpReg
= MRI
->createVirtualRegister(NewUsedRegRC
);
5414 Register DstTmpReg
= MRI
->createVirtualRegister(NewUsedRegRC
);
5415 BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::IMPLICIT_DEF
), TmpReg
);
5416 BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::INSERT_SUBREG
), DstTmpReg
)
5419 .addImm(PPC::sub_32
);
5420 PromoteRegs
[i
] = DstTmpReg
;
5424 Register NewDefinedReg
= MRI
->createVirtualRegister(NewRC
);
5426 BuildMI(*MBB
, MI
, DL
, TII
->get(NewOpcode
), NewDefinedReg
);
5427 MachineBasicBlock::instr_iterator
Iter(MI
);
5429 MachineInstrBuilder
MIBuilder(*Iter
->getMF(), Iter
);
5430 for (unsigned i
= 1; i
< MI
->getNumOperands(); i
++) {
5431 if (PromoteRegs
.find(i
) != PromoteRegs
.end())
5432 MIBuilder
.addReg(PromoteRegs
[i
], RegState::Kill
);
5434 Iter
->addOperand(MI
->getOperand(i
));
5437 for (unsigned i
= 1; i
< Iter
->getNumOperands(); i
++) {
5438 MachineOperand
&Operand
= Iter
->getOperand(i
);
5439 if (!Operand
.isReg())
5441 Register OperandReg
= Operand
.getReg();
5442 if (!OperandReg
.isVirtual())
5444 LV
->recomputeForSingleDefVirtReg(OperandReg
);
5447 MI
->eraseFromParent();
5449 // A defined register may be used by other instructions that are 32-bit.
5450 // After the defined register is promoted to 64-bit for the promoted
5451 // instruction, we need to demote the 64-bit defined register back to a
5453 BuildMI(*MBB
, ++Iter
, DL
, TII
->get(PPC::COPY
), SrcReg
)
5454 .addReg(NewDefinedReg
, RegState::Kill
, PPC::sub_32
);
5455 LV
->recomputeForSingleDefVirtReg(NewDefinedReg
);
5458 // The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5459 // does not count all of the recursions. The parameter BinOpDepth is incremented
5460 // only when isSignOrZeroExtended calls itself more than once. This is done to
5461 // prevent expontential recursion. There is no parameter to track linear
5463 std::pair
<bool, bool>
5464 PPCInstrInfo::isSignOrZeroExtended(const unsigned Reg
,
5465 const unsigned BinOpDepth
,
5466 const MachineRegisterInfo
*MRI
) const {
5467 if (!Register::isVirtualRegister(Reg
))
5468 return std::pair
<bool, bool>(false, false);
5470 MachineInstr
*MI
= MRI
->getVRegDef(Reg
);
5472 return std::pair
<bool, bool>(false, false);
5474 bool IsSExt
= definedBySignExtendingOp(Reg
, MRI
);
5475 bool IsZExt
= definedByZeroExtendingOp(Reg
, MRI
);
5477 // If we know the instruction always returns sign- and zero-extended result,
5479 if (IsSExt
&& IsZExt
)
5480 return std::pair
<bool, bool>(IsSExt
, IsZExt
);
5482 switch (MI
->getOpcode()) {
5484 Register SrcReg
= MI
->getOperand(1).getReg();
5486 // In both ELFv1 and v2 ABI, method parameters and the return value
5487 // are sign- or zero-extended.
5488 const MachineFunction
*MF
= MI
->getMF();
5490 if (!MF
->getSubtarget
<PPCSubtarget
>().isSVR4ABI()) {
5491 // If this is a copy from another register, we recursively check source.
5492 auto SrcExt
= isSignOrZeroExtended(SrcReg
, BinOpDepth
, MRI
);
5493 return std::pair
<bool, bool>(SrcExt
.first
|| IsSExt
,
5494 SrcExt
.second
|| IsZExt
);
5497 // From here on everything is SVR4ABI
5498 const PPCFunctionInfo
*FuncInfo
= MF
->getInfo
<PPCFunctionInfo
>();
5499 // We check the ZExt/SExt flags for a method parameter.
5500 if (MI
->getParent()->getBasicBlock() ==
5501 &MF
->getFunction().getEntryBlock()) {
5502 Register VReg
= MI
->getOperand(0).getReg();
5503 if (MF
->getRegInfo().isLiveIn(VReg
)) {
5504 IsSExt
|= FuncInfo
->isLiveInSExt(VReg
);
5505 IsZExt
|= FuncInfo
->isLiveInZExt(VReg
);
5506 return std::pair
<bool, bool>(IsSExt
, IsZExt
);
5510 if (SrcReg
!= PPC::X3
) {
5511 // If this is a copy from another register, we recursively check source.
5512 auto SrcExt
= isSignOrZeroExtended(SrcReg
, BinOpDepth
, MRI
);
5513 return std::pair
<bool, bool>(SrcExt
.first
|| IsSExt
,
5514 SrcExt
.second
|| IsZExt
);
5517 // For a method return value, we check the ZExt/SExt flags in attribute.
5518 // We assume the following code sequence for method call.
5519 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5520 // BL8_NOP @func,...
5521 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5522 // %5 = COPY %x3; G8RC:%5
5523 const MachineBasicBlock
*MBB
= MI
->getParent();
5524 std::pair
<bool, bool> IsExtendPair
= std::pair
<bool, bool>(IsSExt
, IsZExt
);
5525 MachineBasicBlock::const_instr_iterator II
=
5526 MachineBasicBlock::const_instr_iterator(MI
);
5527 if (II
== MBB
->instr_begin() || (--II
)->getOpcode() != PPC::ADJCALLSTACKUP
)
5528 return IsExtendPair
;
5530 const MachineInstr
&CallMI
= *(--II
);
5531 if (!CallMI
.isCall() || !CallMI
.getOperand(0).isGlobal())
5532 return IsExtendPair
;
5534 const Function
*CalleeFn
=
5535 dyn_cast_if_present
<Function
>(CallMI
.getOperand(0).getGlobal());
5537 return IsExtendPair
;
5538 const IntegerType
*IntTy
= dyn_cast
<IntegerType
>(CalleeFn
->getReturnType());
5539 if (IntTy
&& IntTy
->getBitWidth() <= 32) {
5540 const AttributeSet
&Attrs
= CalleeFn
->getAttributes().getRetAttrs();
5541 IsSExt
|= Attrs
.hasAttribute(Attribute::SExt
);
5542 IsZExt
|= Attrs
.hasAttribute(Attribute::ZExt
);
5543 return std::pair
<bool, bool>(IsSExt
, IsZExt
);
5546 return IsExtendPair
;
5549 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5550 // So, we track the operand register as we do for register copy.
5555 Register SrcReg
= MI
->getOperand(1).getReg();
5556 auto SrcExt
= isSignOrZeroExtended(SrcReg
, BinOpDepth
, MRI
);
5557 return std::pair
<bool, bool>(SrcExt
.first
|| IsSExt
,
5558 SrcExt
.second
|| IsZExt
);
5561 // OR, XOR with shifted 16-bit immediate does not change the upper
5562 // 32 bits. So, we track the operand register for zero extension.
5563 // For sign extension when the MSB of the immediate is zero, we also
5564 // track the operand register since the upper 33 bits are unchanged.
5569 Register SrcReg
= MI
->getOperand(1).getReg();
5570 auto SrcExt
= isSignOrZeroExtended(SrcReg
, BinOpDepth
, MRI
);
5571 uint16_t Imm
= MI
->getOperand(2).getImm();
5573 return std::pair
<bool, bool>(false, SrcExt
.second
|| IsZExt
);
5575 return std::pair
<bool, bool>(SrcExt
.first
|| IsSExt
,
5576 SrcExt
.second
|| IsZExt
);
5579 // If all incoming values are sign-/zero-extended,
5580 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5585 if (BinOpDepth
>= MAX_BINOP_DEPTH
)
5586 return std::pair
<bool, bool>(false, false);
5588 // The input registers for PHI are operand 1, 3, ...
5589 // The input registers for others are operand 1 and 2.
5590 unsigned OperandEnd
= 3, OperandStride
= 1;
5591 if (MI
->getOpcode() == PPC::PHI
) {
5592 OperandEnd
= MI
->getNumOperands();
5598 for (unsigned I
= 1; I
!= OperandEnd
; I
+= OperandStride
) {
5599 if (!MI
->getOperand(I
).isReg())
5600 return std::pair
<bool, bool>(false, false);
5602 Register SrcReg
= MI
->getOperand(I
).getReg();
5603 auto SrcExt
= isSignOrZeroExtended(SrcReg
, BinOpDepth
+ 1, MRI
);
5604 IsSExt
&= SrcExt
.first
;
5605 IsZExt
&= SrcExt
.second
;
5607 return std::pair
<bool, bool>(IsSExt
, IsZExt
);
5610 // If at least one of the incoming values of an AND is zero extended
5611 // then the output is also zero-extended. If both of the incoming values
5612 // are sign-extended then the output is also sign extended.
5615 if (BinOpDepth
>= MAX_BINOP_DEPTH
)
5616 return std::pair
<bool, bool>(false, false);
5618 Register SrcReg1
= MI
->getOperand(1).getReg();
5619 Register SrcReg2
= MI
->getOperand(2).getReg();
5620 auto Src1Ext
= isSignOrZeroExtended(SrcReg1
, BinOpDepth
+ 1, MRI
);
5621 auto Src2Ext
= isSignOrZeroExtended(SrcReg2
, BinOpDepth
+ 1, MRI
);
5622 return std::pair
<bool, bool>(Src1Ext
.first
&& Src2Ext
.first
,
5623 Src1Ext
.second
|| Src2Ext
.second
);
5629 return std::pair
<bool, bool>(IsSExt
, IsZExt
);
5632 bool PPCInstrInfo::isBDNZ(unsigned Opcode
) const {
5633 return (Opcode
== (Subtarget
.isPPC64() ? PPC::BDNZ8
: PPC::BDNZ
));
5637 class PPCPipelinerLoopInfo
: public TargetInstrInfo::PipelinerLoopInfo
{
5638 MachineInstr
*Loop
, *EndLoop
, *LoopCount
;
5639 MachineFunction
*MF
;
5640 const TargetInstrInfo
*TII
;
5644 PPCPipelinerLoopInfo(MachineInstr
*Loop
, MachineInstr
*EndLoop
,
5645 MachineInstr
*LoopCount
)
5646 : Loop(Loop
), EndLoop(EndLoop
), LoopCount(LoopCount
),
5647 MF(Loop
->getParent()->getParent()),
5648 TII(MF
->getSubtarget().getInstrInfo()) {
5649 // Inspect the Loop instruction up-front, as it may be deleted when we call
5650 // createTripCountGreaterCondition.
5651 if (LoopCount
->getOpcode() == PPC::LI8
|| LoopCount
->getOpcode() == PPC::LI
)
5652 TripCount
= LoopCount
->getOperand(1).getImm();
5657 bool shouldIgnoreForPipelining(const MachineInstr
*MI
) const override
{
5658 // Only ignore the terminator.
5659 return MI
== EndLoop
;
5662 std::optional
<bool> createTripCountGreaterCondition(
5663 int TC
, MachineBasicBlock
&MBB
,
5664 SmallVectorImpl
<MachineOperand
> &Cond
) override
{
5665 if (TripCount
== -1) {
5666 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5667 // so we don't need to generate any thing here.
5668 Cond
.push_back(MachineOperand::CreateImm(0));
5669 Cond
.push_back(MachineOperand::CreateReg(
5670 MF
->getSubtarget
<PPCSubtarget
>().isPPC64() ? PPC::CTR8
: PPC::CTR
,
5675 return TripCount
> TC
;
5678 void setPreheader(MachineBasicBlock
*NewPreheader
) override
{
5679 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5680 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5683 void adjustTripCount(int TripCountAdjust
) override
{
5684 // If the loop trip count is a compile-time value, then just change the
5686 if (LoopCount
->getOpcode() == PPC::LI8
||
5687 LoopCount
->getOpcode() == PPC::LI
) {
5688 int64_t TripCount
= LoopCount
->getOperand(1).getImm() + TripCountAdjust
;
5689 LoopCount
->getOperand(1).setImm(TripCount
);
5693 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5694 // so we don't need to generate any thing here.
5697 void disposed() override
{
5698 Loop
->eraseFromParent();
5699 // Ensure the loop setup instruction is deleted too.
5700 LoopCount
->eraseFromParent();
5705 std::unique_ptr
<TargetInstrInfo::PipelinerLoopInfo
>
5706 PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock
*LoopBB
) const {
5707 // We really "analyze" only hardware loops right now.
5708 MachineBasicBlock::iterator I
= LoopBB
->getFirstTerminator();
5709 MachineBasicBlock
*Preheader
= *LoopBB
->pred_begin();
5710 if (Preheader
== LoopBB
)
5711 Preheader
= *std::next(LoopBB
->pred_begin());
5712 MachineFunction
*MF
= Preheader
->getParent();
5714 if (I
!= LoopBB
->end() && isBDNZ(I
->getOpcode())) {
5715 SmallPtrSet
<MachineBasicBlock
*, 8> Visited
;
5716 if (MachineInstr
*LoopInst
= findLoopInstr(*Preheader
, Visited
)) {
5717 Register LoopCountReg
= LoopInst
->getOperand(0).getReg();
5718 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
5719 MachineInstr
*LoopCount
= MRI
.getUniqueVRegDef(LoopCountReg
);
5720 return std::make_unique
<PPCPipelinerLoopInfo
>(LoopInst
, &*I
, LoopCount
);
5726 MachineInstr
*PPCInstrInfo::findLoopInstr(
5727 MachineBasicBlock
&PreHeader
,
5728 SmallPtrSet
<MachineBasicBlock
*, 8> &Visited
) const {
5730 unsigned LOOPi
= (Subtarget
.isPPC64() ? PPC::MTCTR8loop
: PPC::MTCTRloop
);
5732 // The loop set-up instruction should be in preheader
5733 for (auto &I
: PreHeader
.instrs())
5734 if (I
.getOpcode() == LOOPi
)
5739 // Return true if get the base operand, byte offset of an instruction and the
5740 // memory width. Width is the size of memory that is being loaded/stored.
5741 bool PPCInstrInfo::getMemOperandWithOffsetWidth(
5742 const MachineInstr
&LdSt
, const MachineOperand
*&BaseReg
, int64_t &Offset
,
5743 LocationSize
&Width
, const TargetRegisterInfo
*TRI
) const {
5744 if (!LdSt
.mayLoadOrStore() || LdSt
.getNumExplicitOperands() != 3)
5747 // Handle only loads/stores with base register followed by immediate offset.
5748 if (!LdSt
.getOperand(1).isImm() ||
5749 (!LdSt
.getOperand(2).isReg() && !LdSt
.getOperand(2).isFI()))
5751 if (!LdSt
.getOperand(1).isImm() ||
5752 (!LdSt
.getOperand(2).isReg() && !LdSt
.getOperand(2).isFI()))
5755 if (!LdSt
.hasOneMemOperand())
5758 Width
= (*LdSt
.memoperands_begin())->getSize();
5759 Offset
= LdSt
.getOperand(1).getImm();
5760 BaseReg
= &LdSt
.getOperand(2);
5764 bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
5765 const MachineInstr
&MIa
, const MachineInstr
&MIb
) const {
5766 assert(MIa
.mayLoadOrStore() && "MIa must be a load or store.");
5767 assert(MIb
.mayLoadOrStore() && "MIb must be a load or store.");
5769 if (MIa
.hasUnmodeledSideEffects() || MIb
.hasUnmodeledSideEffects() ||
5770 MIa
.hasOrderedMemoryRef() || MIb
.hasOrderedMemoryRef())
5773 // Retrieve the base register, offset from the base register and width. Width
5774 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5775 // base registers are identical, and the offset of a lower memory access +
5776 // the width doesn't overlap the offset of a higher memory access,
5777 // then the memory accesses are different.
5778 const TargetRegisterInfo
*TRI
= &getRegisterInfo();
5779 const MachineOperand
*BaseOpA
= nullptr, *BaseOpB
= nullptr;
5780 int64_t OffsetA
= 0, OffsetB
= 0;
5781 LocationSize WidthA
= 0, WidthB
= 0;
5782 if (getMemOperandWithOffsetWidth(MIa
, BaseOpA
, OffsetA
, WidthA
, TRI
) &&
5783 getMemOperandWithOffsetWidth(MIb
, BaseOpB
, OffsetB
, WidthB
, TRI
)) {
5784 if (BaseOpA
->isIdenticalTo(*BaseOpB
)) {
5785 int LowOffset
= std::min(OffsetA
, OffsetB
);
5786 int HighOffset
= std::max(OffsetA
, OffsetB
);
5787 LocationSize LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
5788 if (LowWidth
.hasValue() &&
5789 LowOffset
+ (int)LowWidth
.getValue() <= HighOffset
)