1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
13 //===----------------------------------------------------------------------===//
15 #include "MCTargetDesc/PPCMCTargetDesc.h"
16 #include "MCTargetDesc/PPCPredicates.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/BranchProbabilityInfo.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGISel.h"
37 #include "llvm/CodeGen/SelectionDAGNodes.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/ValueTypes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GlobalValue.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/KnownBits.h"
55 #include "llvm/Support/MachineValueType.h"
56 #include "llvm/Support/MathExtras.h"
57 #include "llvm/Support/raw_ostream.h"
70 #define DEBUG_TYPE "ppc-codegen"
72 STATISTIC(NumSextSetcc
,
73 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc
,
75 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded
,
77 "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded
,
79 "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison
,
81 "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses
,
83 "Number of compares not eliminated as they have non-extending uses.");
85 // FIXME: Remove this once the bug has been fixed!
86 cl::opt
<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden
);
90 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
91 cl::desc("use aggressive ppc isel for bit permutations"),
93 static cl::opt
<bool> BPermRewriterNoMasking(
94 "ppc-bit-perm-rewriter-stress-rotates",
95 cl::desc("stress rotate selection in aggressive ppc isel for "
99 static cl::opt
<bool> EnableBranchHint(
100 "ppc-use-branch-hint", cl::init(true),
101 cl::desc("Enable static hinting of branches on ppc"),
104 static cl::opt
<bool> EnableTLSOpt(
105 "ppc-tls-opt", cl::init(true),
106 cl::desc("Enable tls optimization peephole"),
109 enum ICmpInGPRType
{ ICGPR_All
, ICGPR_None
, ICGPR_I32
, ICGPR_I64
,
110 ICGPR_NonExtIn
, ICGPR_Zext
, ICGPR_Sext
, ICGPR_ZextI32
,
111 ICGPR_SextI32
, ICGPR_ZextI64
, ICGPR_SextI64
};
113 static cl::opt
<ICmpInGPRType
> CmpInGPR(
114 "ppc-gpr-icmps", cl::Hidden
, cl::init(ICGPR_All
),
115 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
116 cl::values(clEnumValN(ICGPR_None
, "none", "Do not modify integer comparisons."),
117 clEnumValN(ICGPR_All
, "all", "All possible int comparisons in GPRs."),
118 clEnumValN(ICGPR_I32
, "i32", "Only i32 comparisons in GPRs."),
119 clEnumValN(ICGPR_I64
, "i64", "Only i64 comparisons in GPRs."),
120 clEnumValN(ICGPR_NonExtIn
, "nonextin",
121 "Only comparisons where inputs don't need [sz]ext."),
122 clEnumValN(ICGPR_Zext
, "zext", "Only comparisons with zext result."),
123 clEnumValN(ICGPR_ZextI32
, "zexti32",
124 "Only i32 comparisons with zext result."),
125 clEnumValN(ICGPR_ZextI64
, "zexti64",
126 "Only i64 comparisons with zext result."),
127 clEnumValN(ICGPR_Sext
, "sext", "Only comparisons with sext result."),
128 clEnumValN(ICGPR_SextI32
, "sexti32",
129 "Only i32 comparisons with sext result."),
130 clEnumValN(ICGPR_SextI64
, "sexti64",
131 "Only i64 comparisons with sext result.")));
134 //===--------------------------------------------------------------------===//
135 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
136 /// instructions for SelectionDAG operations.
138 class PPCDAGToDAGISel
: public SelectionDAGISel
{
139 const PPCTargetMachine
&TM
;
140 const PPCSubtarget
*PPCSubTarget
;
141 const PPCTargetLowering
*PPCLowering
;
142 unsigned GlobalBaseReg
;
145 explicit PPCDAGToDAGISel(PPCTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
146 : SelectionDAGISel(tm
, OptLevel
), TM(tm
) {}
148 bool runOnMachineFunction(MachineFunction
&MF
) override
{
149 // Make sure we re-emit a set of the global base reg if necessary
151 PPCSubTarget
= &MF
.getSubtarget
<PPCSubtarget
>();
152 PPCLowering
= PPCSubTarget
->getTargetLowering();
153 SelectionDAGISel::runOnMachineFunction(MF
);
155 if (!PPCSubTarget
->isSVR4ABI())
156 InsertVRSaveCode(MF
);
161 void PreprocessISelDAG() override
;
162 void PostprocessISelDAG() override
;
164 /// getI16Imm - Return a target constant with the specified value, of type
166 inline SDValue
getI16Imm(unsigned Imm
, const SDLoc
&dl
) {
167 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i16
);
170 /// getI32Imm - Return a target constant with the specified value, of type
172 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
173 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
176 /// getI64Imm - Return a target constant with the specified value, of type
178 inline SDValue
getI64Imm(uint64_t Imm
, const SDLoc
&dl
) {
179 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
182 /// getSmallIPtrImm - Return a target constant of pointer type.
183 inline SDValue
getSmallIPtrImm(unsigned Imm
, const SDLoc
&dl
) {
184 return CurDAG
->getTargetConstant(
185 Imm
, dl
, PPCLowering
->getPointerTy(CurDAG
->getDataLayout()));
188 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
189 /// rotate and mask opcode and mask operation.
190 static bool isRotateAndMask(SDNode
*N
, unsigned Mask
, bool isShiftMask
,
191 unsigned &SH
, unsigned &MB
, unsigned &ME
);
193 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
194 /// base register. Return the virtual register that holds this value.
195 SDNode
*getGlobalBaseReg();
197 void selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
= 0);
199 // Select - Convert the specified operand from a target-independent to a
200 // target-specific node if it hasn't already been changed.
201 void Select(SDNode
*N
) override
;
203 bool tryBitfieldInsert(SDNode
*N
);
204 bool tryBitPermutation(SDNode
*N
);
205 bool tryIntCompareInGPR(SDNode
*N
);
207 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
208 // an X-Form load instruction with the offset being a relocation coming from
209 // the PPCISD::ADD_TLS.
210 bool tryTLSXFormLoad(LoadSDNode
*N
);
211 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
212 // an X-Form store instruction with the offset being a relocation coming from
213 // the PPCISD::ADD_TLS.
214 bool tryTLSXFormStore(StoreSDNode
*N
);
215 /// SelectCC - Select a comparison of the specified values with the
216 /// specified condition code, returning the CR# of the expression.
217 SDValue
SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
220 /// SelectAddrImm - Returns true if the address N can be represented by
221 /// a base register plus a signed 16-bit displacement [r+imm].
222 bool SelectAddrImm(SDValue N
, SDValue
&Disp
,
224 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 0);
227 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
228 /// immediate field. Note that the operand at this point is already the
229 /// result of a prior SelectAddressRegImm call.
230 bool SelectAddrImmOffs(SDValue N
, SDValue
&Out
) const {
231 if (N
.getOpcode() == ISD::TargetConstant
||
232 N
.getOpcode() == ISD::TargetGlobalAddress
) {
240 /// SelectAddrIdx - Given the specified addressed, check to see if it can be
241 /// represented as an indexed [r+r] operation. Returns false if it can
242 /// be represented by [r+imm], which are preferred.
243 bool SelectAddrIdx(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
244 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
);
247 /// SelectAddrIdxOnly - Given the specified addressed, force it to be
248 /// represented as an indexed [r+r] operation.
249 bool SelectAddrIdxOnly(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
250 return PPCLowering
->SelectAddressRegRegOnly(N
, Base
, Index
, *CurDAG
);
253 /// SelectAddrImmX4 - Returns true if the address N can be represented by
254 /// a base register plus a signed 16-bit displacement that is a multiple of 4.
255 /// Suitable for use by STD and friends.
256 bool SelectAddrImmX4(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
257 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 4);
260 bool SelectAddrImmX16(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
261 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 16);
264 // Select an address into a single register.
265 bool SelectAddr(SDValue N
, SDValue
&Base
) {
270 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
271 /// inline asm expressions. It is always correct to compute the value into
272 /// a register. The case of adding a (possibly relocatable) constant to a
273 /// register can be improved, but it is wrong to substitute Reg+Reg for
274 /// Reg in an asm, because the load or store opcode would have to change.
275 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
276 unsigned ConstraintID
,
277 std::vector
<SDValue
> &OutOps
) override
{
278 switch(ConstraintID
) {
280 errs() << "ConstraintID: " << ConstraintID
<< "\n";
281 llvm_unreachable("Unexpected asm memory constraint");
282 case InlineAsm::Constraint_es
:
283 case InlineAsm::Constraint_i
:
284 case InlineAsm::Constraint_m
:
285 case InlineAsm::Constraint_o
:
286 case InlineAsm::Constraint_Q
:
287 case InlineAsm::Constraint_Z
:
288 case InlineAsm::Constraint_Zy
:
289 // We need to make sure that this one operand does not end up in r0
290 // (because we might end up lowering this as 0(%op)).
291 const TargetRegisterInfo
*TRI
= PPCSubTarget
->getRegisterInfo();
292 const TargetRegisterClass
*TRC
= TRI
->getPointerRegClass(*MF
, /*Kind=*/1);
294 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), dl
, MVT::i32
);
296 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
297 dl
, Op
.getValueType(),
300 OutOps
.push_back(NewOp
);
306 void InsertVRSaveCode(MachineFunction
&MF
);
308 StringRef
getPassName() const override
{
309 return "PowerPC DAG->DAG Pattern Instruction Selection";
312 // Include the pieces autogenerated from the target description.
313 #include "PPCGenDAGISel.inc"
316 bool trySETCC(SDNode
*N
);
318 void PeepholePPC64();
319 void PeepholePPC64ZExt();
320 void PeepholeCROps();
322 SDValue
combineToCMPB(SDNode
*N
);
323 void foldBoolExts(SDValue
&Res
, SDNode
*&N
);
325 bool AllUsersSelectZero(SDNode
*N
);
326 void SwapAllSelectUsers(SDNode
*N
);
328 bool isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const;
329 void transferMemOperands(SDNode
*N
, SDNode
*Result
);
330 MachineSDNode
*flipSignBit(const SDValue
&N
, SDNode
**SignBit
= nullptr);
333 } // end anonymous namespace
335 /// InsertVRSaveCode - Once the entire function has been instruction selected,
336 /// all virtual registers are created and all machine instructions are built,
337 /// check to see if we need to save/restore VRSAVE. If so, do it.
338 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction
&Fn
) {
339 // Check to see if this function uses vector registers, which means we have to
340 // save and restore the VRSAVE register and update it with the regs we use.
342 // In this case, there will be virtual registers of vector type created
343 // by the scheduler. Detect them now.
344 bool HasVectorVReg
= false;
345 for (unsigned i
= 0, e
= RegInfo
->getNumVirtRegs(); i
!= e
; ++i
) {
346 unsigned Reg
= TargetRegisterInfo::index2VirtReg(i
);
347 if (RegInfo
->getRegClass(Reg
) == &PPC::VRRCRegClass
) {
348 HasVectorVReg
= true;
352 if (!HasVectorVReg
) return; // nothing to do.
354 // If we have a vector register, we want to emit code into the entry and exit
355 // blocks to save and restore the VRSAVE register. We do this here (instead
356 // of marking all vector instructions as clobbering VRSAVE) for two reasons:
358 // 1. This (trivially) reduces the load on the register allocator, by not
359 // having to represent the live range of the VRSAVE register.
360 // 2. This (more significantly) allows us to create a temporary virtual
361 // register to hold the saved VRSAVE value, allowing this temporary to be
362 // register allocated, instead of forcing it to be spilled to the stack.
364 // Create two vregs - one to hold the VRSAVE register that is live-in to the
365 // function and one for the value after having bits or'd into it.
366 unsigned InVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
367 unsigned UpdatedVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
369 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
370 MachineBasicBlock
&EntryBB
= *Fn
.begin();
372 // Emit the following code into the entry block:
373 // InVRSAVE = MFVRSAVE
374 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
375 // MTVRSAVE UpdatedVRSAVE
376 MachineBasicBlock::iterator IP
= EntryBB
.begin(); // Insert Point
377 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MFVRSAVE
), InVRSAVE
);
378 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::UPDATE_VRSAVE
),
379 UpdatedVRSAVE
).addReg(InVRSAVE
);
380 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(UpdatedVRSAVE
);
382 // Find all return blocks, outputting a restore in each epilog.
383 for (MachineFunction::iterator BB
= Fn
.begin(), E
= Fn
.end(); BB
!= E
; ++BB
) {
384 if (BB
->isReturnBlock()) {
385 IP
= BB
->end(); --IP
;
387 // Skip over all terminator instructions, which are part of the return
389 MachineBasicBlock::iterator I2
= IP
;
390 while (I2
!= BB
->begin() && (--I2
)->isTerminator())
393 // Emit: MTVRSAVE InVRSave
394 BuildMI(*BB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(InVRSAVE
);
399 /// getGlobalBaseReg - Output the instructions required to put the
400 /// base address to use for accessing globals into a register.
402 SDNode
*PPCDAGToDAGISel::getGlobalBaseReg() {
403 if (!GlobalBaseReg
) {
404 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
405 // Insert the set of GlobalBaseReg into the first MBB of the function
406 MachineBasicBlock
&FirstMBB
= MF
->front();
407 MachineBasicBlock::iterator MBBI
= FirstMBB
.begin();
408 const Module
*M
= MF
->getFunction().getParent();
411 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) == MVT::i32
) {
412 if (PPCSubTarget
->isTargetELF()) {
413 GlobalBaseReg
= PPC::R30
;
414 if (M
->getPICLevel() == PICLevel::SmallPIC
) {
415 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MoveGOTtoLR
));
416 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
417 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
419 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
420 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
421 unsigned TempReg
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
422 BuildMI(FirstMBB
, MBBI
, dl
,
423 TII
.get(PPC::UpdateGBR
), GlobalBaseReg
)
424 .addReg(TempReg
, RegState::Define
).addReg(GlobalBaseReg
);
425 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
429 RegInfo
->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass
);
430 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
431 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
434 // We must ensure that this sequence is dominated by the prologue.
435 // FIXME: This is a bit of a big hammer since we don't get the benefits
436 // of shrink-wrapping whenever we emit this instruction. Considering
437 // this is used in any function where we emit a jump table, this may be
438 // a significant limitation. We should consider inserting this in the
439 // block where it is used and then commoning this sequence up if it
440 // appears in multiple places.
441 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
443 MF
->getInfo
<PPCFunctionInfo
>()->setShrinkWrapDisabled(true);
444 GlobalBaseReg
= RegInfo
->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
445 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR8
));
446 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR8
), GlobalBaseReg
);
449 return CurDAG
->getRegister(GlobalBaseReg
,
450 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()))
454 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
455 /// operand. If so Imm will receive the 32-bit value.
456 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
457 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
458 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
464 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
465 /// operand. If so Imm will receive the 64-bit value.
466 static bool isInt64Immediate(SDNode
*N
, uint64_t &Imm
) {
467 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i64
) {
468 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
474 // isInt32Immediate - This method tests to see if a constant operand.
475 // If so Imm will receive the 32 bit value.
476 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
477 return isInt32Immediate(N
.getNode(), Imm
);
480 /// isInt64Immediate - This method tests to see if the value is a 64-bit
481 /// constant operand. If so Imm will receive the 64-bit value.
482 static bool isInt64Immediate(SDValue N
, uint64_t &Imm
) {
483 return isInt64Immediate(N
.getNode(), Imm
);
486 static unsigned getBranchHint(unsigned PCC
, FunctionLoweringInfo
*FuncInfo
,
487 const SDValue
&DestMBB
) {
488 assert(isa
<BasicBlockSDNode
>(DestMBB
));
490 if (!FuncInfo
->BPI
) return PPC::BR_NO_HINT
;
492 const BasicBlock
*BB
= FuncInfo
->MBB
->getBasicBlock();
493 const TerminatorInst
*BBTerm
= BB
->getTerminator();
495 if (BBTerm
->getNumSuccessors() != 2) return PPC::BR_NO_HINT
;
497 const BasicBlock
*TBB
= BBTerm
->getSuccessor(0);
498 const BasicBlock
*FBB
= BBTerm
->getSuccessor(1);
500 auto TProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, TBB
);
501 auto FProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, FBB
);
503 // We only want to handle cases which are easy to predict at static time, e.g.
504 // C++ throw statement, that is very likely not taken, or calling never
505 // returned function, e.g. stdlib exit(). So we set Threshold to filter
508 // Below is LLVM branch weight table, we only want to handle case 1, 2
510 // Case Taken:Nontaken Example
511 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
512 // 2. Invoke-terminating 1:1048575
513 // 3. Coldblock 4:64 __builtin_expect
514 // 4. Loop Branch 124:4 For loop
515 // 5. PH/ZH/FPH 20:12
516 const uint32_t Threshold
= 10000;
518 if (std::max(TProb
, FProb
) / Threshold
< std::min(TProb
, FProb
))
519 return PPC::BR_NO_HINT
;
521 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo
->Fn
->getName()
522 << "::" << BB
->getName() << "'\n"
523 << " -> " << TBB
->getName() << ": " << TProb
<< "\n"
524 << " -> " << FBB
->getName() << ": " << FProb
<< "\n");
526 const BasicBlockSDNode
*BBDN
= cast
<BasicBlockSDNode
>(DestMBB
);
528 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
529 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
530 if (BBDN
->getBasicBlock()->getBasicBlock() != TBB
)
531 std::swap(TProb
, FProb
);
533 return (TProb
> FProb
) ? PPC::BR_TAKEN_HINT
: PPC::BR_NONTAKEN_HINT
;
536 // isOpcWithIntImmediate - This method tests to see if the node is a specific
537 // opcode and that it has a immediate integer right operand.
538 // If so Imm will receive the 32 bit value.
539 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
540 return N
->getOpcode() == Opc
541 && isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
544 void PPCDAGToDAGISel::selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
) {
546 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
547 SDValue TFI
= CurDAG
->getTargetFrameIndex(FI
, N
->getValueType(0));
548 unsigned Opc
= N
->getValueType(0) == MVT::i32
? PPC::ADDI
: PPC::ADDI8
;
550 CurDAG
->SelectNodeTo(SN
, Opc
, N
->getValueType(0), TFI
,
551 getSmallIPtrImm(Offset
, dl
));
553 ReplaceNode(SN
, CurDAG
->getMachineNode(Opc
, dl
, N
->getValueType(0), TFI
,
554 getSmallIPtrImm(Offset
, dl
)));
557 bool PPCDAGToDAGISel::isRotateAndMask(SDNode
*N
, unsigned Mask
,
558 bool isShiftMask
, unsigned &SH
,
559 unsigned &MB
, unsigned &ME
) {
560 // Don't even go down this path for i64, since different logic will be
561 // necessary for rldicl/rldicr/rldimi.
562 if (N
->getValueType(0) != MVT::i32
)
566 unsigned Indeterminant
= ~0; // bit mask marking indeterminant results
567 unsigned Opcode
= N
->getOpcode();
568 if (N
->getNumOperands() != 2 ||
569 !isInt32Immediate(N
->getOperand(1).getNode(), Shift
) || (Shift
> 31))
572 if (Opcode
== ISD::SHL
) {
573 // apply shift left to mask if it comes first
574 if (isShiftMask
) Mask
= Mask
<< Shift
;
575 // determine which bits are made indeterminant by shift
576 Indeterminant
= ~(0xFFFFFFFFu
<< Shift
);
577 } else if (Opcode
== ISD::SRL
) {
578 // apply shift right to mask if it comes first
579 if (isShiftMask
) Mask
= Mask
>> Shift
;
580 // determine which bits are made indeterminant by shift
581 Indeterminant
= ~(0xFFFFFFFFu
>> Shift
);
582 // adjust for the left rotate
584 } else if (Opcode
== ISD::ROTL
) {
590 // if the mask doesn't intersect any Indeterminant bits
591 if (Mask
&& !(Mask
& Indeterminant
)) {
593 // make sure the mask is still a mask (wrap arounds may not be)
594 return isRunOfOnes(Mask
, MB
, ME
);
599 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode
*ST
) {
600 SDValue Base
= ST
->getBasePtr();
601 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
603 SDValue Offset
= ST
->getOffset();
604 if (!Offset
.isUndef())
608 EVT MemVT
= ST
->getMemoryVT();
609 EVT RegVT
= ST
->getValue().getValueType();
612 switch (MemVT
.getSimpleVT().SimpleTy
) {
616 Opcode
= (RegVT
== MVT::i32
) ? PPC::STBXTLS_32
: PPC::STBXTLS
;
620 Opcode
= (RegVT
== MVT::i32
) ? PPC::STHXTLS_32
: PPC::STHXTLS
;
624 Opcode
= (RegVT
== MVT::i32
) ? PPC::STWXTLS_32
: PPC::STWXTLS
;
628 Opcode
= PPC::STDXTLS
;
632 SDValue Chain
= ST
->getChain();
633 SDVTList VTs
= ST
->getVTList();
634 SDValue Ops
[] = {ST
->getValue(), Base
.getOperand(0), Base
.getOperand(1),
636 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
637 transferMemOperands(ST
, MN
);
642 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode
*LD
) {
643 SDValue Base
= LD
->getBasePtr();
644 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
646 SDValue Offset
= LD
->getOffset();
647 if (!Offset
.isUndef())
651 EVT MemVT
= LD
->getMemoryVT();
652 EVT RegVT
= LD
->getValueType(0);
654 switch (MemVT
.getSimpleVT().SimpleTy
) {
658 Opcode
= (RegVT
== MVT::i32
) ? PPC::LBZXTLS_32
: PPC::LBZXTLS
;
662 Opcode
= (RegVT
== MVT::i32
) ? PPC::LHZXTLS_32
: PPC::LHZXTLS
;
666 Opcode
= (RegVT
== MVT::i32
) ? PPC::LWZXTLS_32
: PPC::LWZXTLS
;
670 Opcode
= PPC::LDXTLS
;
674 SDValue Chain
= LD
->getChain();
675 SDVTList VTs
= LD
->getVTList();
676 SDValue Ops
[] = {Base
.getOperand(0), Base
.getOperand(1), Chain
};
677 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
678 transferMemOperands(LD
, MN
);
683 /// Turn an or of two masked values into the rotate left word immediate then
684 /// mask insert (rlwimi) instruction.
685 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode
*N
) {
686 SDValue Op0
= N
->getOperand(0);
687 SDValue Op1
= N
->getOperand(1);
690 KnownBits LKnown
, RKnown
;
691 CurDAG
->computeKnownBits(Op0
, LKnown
);
692 CurDAG
->computeKnownBits(Op1
, RKnown
);
694 unsigned TargetMask
= LKnown
.Zero
.getZExtValue();
695 unsigned InsertMask
= RKnown
.Zero
.getZExtValue();
697 if ((TargetMask
| InsertMask
) == 0xFFFFFFFF) {
698 unsigned Op0Opc
= Op0
.getOpcode();
699 unsigned Op1Opc
= Op1
.getOpcode();
700 unsigned Value
, SH
= 0;
701 TargetMask
= ~TargetMask
;
702 InsertMask
= ~InsertMask
;
704 // If the LHS has a foldable shift and the RHS does not, then swap it to the
705 // RHS so that we can fold the shift into the insert.
706 if (Op0Opc
== ISD::AND
&& Op1Opc
== ISD::AND
) {
707 if (Op0
.getOperand(0).getOpcode() == ISD::SHL
||
708 Op0
.getOperand(0).getOpcode() == ISD::SRL
) {
709 if (Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
710 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
712 std::swap(Op0Opc
, Op1Opc
);
713 std::swap(TargetMask
, InsertMask
);
716 } else if (Op0Opc
== ISD::SHL
|| Op0Opc
== ISD::SRL
) {
717 if (Op1Opc
== ISD::AND
&& Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
718 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
720 std::swap(Op0Opc
, Op1Opc
);
721 std::swap(TargetMask
, InsertMask
);
726 if (isRunOfOnes(InsertMask
, MB
, ME
)) {
727 if ((Op1Opc
== ISD::SHL
|| Op1Opc
== ISD::SRL
) &&
728 isInt32Immediate(Op1
.getOperand(1), Value
)) {
729 Op1
= Op1
.getOperand(0);
730 SH
= (Op1Opc
== ISD::SHL
) ? Value
: 32 - Value
;
732 if (Op1Opc
== ISD::AND
) {
733 // The AND mask might not be a constant, and we need to make sure that
734 // if we're going to fold the masking with the insert, all bits not
735 // know to be zero in the mask are known to be one.
737 CurDAG
->computeKnownBits(Op1
.getOperand(1), MKnown
);
738 bool CanFoldMask
= InsertMask
== MKnown
.One
.getZExtValue();
740 unsigned SHOpc
= Op1
.getOperand(0).getOpcode();
741 if ((SHOpc
== ISD::SHL
|| SHOpc
== ISD::SRL
) && CanFoldMask
&&
742 isInt32Immediate(Op1
.getOperand(0).getOperand(1), Value
)) {
743 // Note that Value must be in range here (less than 32) because
744 // otherwise there would not be any bits set in InsertMask.
745 Op1
= Op1
.getOperand(0).getOperand(0);
746 SH
= (SHOpc
== ISD::SHL
) ? Value
: 32 - Value
;
751 SDValue Ops
[] = { Op0
, Op1
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
753 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
760 // Predict the number of instructions that would be generated by calling
762 static unsigned selectI64ImmInstrCountDirect(int64_t Imm
) {
763 // Assume no remaining bits.
764 unsigned Remainder
= 0;
765 // Assume no shift required.
768 // If it can't be represented as a 32 bit value.
769 if (!isInt
<32>(Imm
)) {
770 Shift
= countTrailingZeros
<uint64_t>(Imm
);
771 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
773 // If the shifted value fits 32 bits.
774 if (isInt
<32>(ImmSh
)) {
775 // Go with the shifted value.
778 // Still stuck with a 64 bit value.
785 // Intermediate operand.
788 // Handle first 32 bits.
789 unsigned Lo
= Imm
& 0xFFFF;
792 if (isInt
<16>(Imm
)) {
796 // Handle the Hi bits and Lo bits.
803 // If no shift, we're done.
804 if (!Shift
) return Result
;
806 // If Hi word == Lo word,
807 // we can use rldimi to insert the Lo word into Hi word.
808 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
813 // Shift for next step if the upper 32-bits were not zero.
817 // Add in the last bits as required.
818 if ((Remainder
>> 16) & 0xFFFF)
820 if (Remainder
& 0xFFFF)
826 static uint64_t Rot64(uint64_t Imm
, unsigned R
) {
827 return (Imm
<< R
) | (Imm
>> (64 - R
));
830 static unsigned selectI64ImmInstrCount(int64_t Imm
) {
831 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
833 // If the instruction count is 1 or 2, we do not need further analysis
834 // since rotate + load constant requires at least 2 instructions.
838 for (unsigned r
= 1; r
< 63; ++r
) {
839 uint64_t RImm
= Rot64(Imm
, r
);
840 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
841 Count
= std::min(Count
, RCount
);
843 // See comments in selectI64Imm for an explanation of the logic below.
844 unsigned LS
= findLastSet(RImm
);
848 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
849 uint64_t RImmWithOnes
= RImm
| OnesMask
;
851 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
852 Count
= std::min(Count
, RCount
);
858 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
859 // (above) needs to be kept in sync with this function.
860 static SDNode
*selectI64ImmDirect(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
862 // Assume no remaining bits.
863 unsigned Remainder
= 0;
864 // Assume no shift required.
867 // If it can't be represented as a 32 bit value.
868 if (!isInt
<32>(Imm
)) {
869 Shift
= countTrailingZeros
<uint64_t>(Imm
);
870 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
872 // If the shifted value fits 32 bits.
873 if (isInt
<32>(ImmSh
)) {
874 // Go with the shifted value.
877 // Still stuck with a 64 bit value.
884 // Intermediate operand.
887 // Handle first 32 bits.
888 unsigned Lo
= Imm
& 0xFFFF;
889 unsigned Hi
= (Imm
>> 16) & 0xFFFF;
891 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
892 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
896 if (isInt
<16>(Imm
)) {
897 uint64_t SextImm
= SignExtend64(Lo
, 16);
898 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
900 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
902 // Handle the Hi bits.
903 unsigned OpC
= Hi
? PPC::LIS8
: PPC::LI8
;
904 Result
= CurDAG
->getMachineNode(OpC
, dl
, MVT::i64
, getI32Imm(Hi
));
906 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
907 SDValue(Result
, 0), getI32Imm(Lo
));
910 Result
= CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
, getI32Imm(Hi
));
913 // If no shift, we're done.
914 if (!Shift
) return Result
;
916 // If Hi word == Lo word,
917 // we can use rldimi to insert the Lo word into Hi word.
918 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
920 { SDValue(Result
, 0), SDValue(Result
, 0), getI32Imm(Shift
), getI32Imm(0)};
921 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
924 // Shift for next step if the upper 32-bits were not zero.
926 Result
= CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
,
929 getI32Imm(63 - Shift
));
932 // Add in the last bits as required.
933 if ((Hi
= (Remainder
>> 16) & 0xFFFF)) {
934 Result
= CurDAG
->getMachineNode(PPC::ORIS8
, dl
, MVT::i64
,
935 SDValue(Result
, 0), getI32Imm(Hi
));
937 if ((Lo
= Remainder
& 0xFFFF)) {
938 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
939 SDValue(Result
, 0), getI32Imm(Lo
));
945 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
947 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
949 // If the instruction count is 1 or 2, we do not need further analysis
950 // since rotate + load constant requires at least 2 instructions.
952 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
959 for (unsigned r
= 1; r
< 63; ++r
) {
960 uint64_t RImm
= Rot64(Imm
, r
);
961 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
962 if (RCount
< Count
) {
969 // If the immediate to generate has many trailing zeros, it might be
970 // worthwhile to generate a rotated value with too many leading ones
971 // (because that's free with li/lis's sign-extension semantics), and then
972 // mask them off after rotation.
974 unsigned LS
= findLastSet(RImm
);
975 // We're adding (63-LS) higher-order ones, and we expect to mask them off
976 // after performing the inverse rotation by (64-r). So we need that:
977 // 63-LS == 64-r => LS == r-1
981 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
982 uint64_t RImmWithOnes
= RImm
| OnesMask
;
984 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
985 if (RCount
< Count
) {
988 MatImm
= RImmWithOnes
;
994 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
996 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
997 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1000 SDValue Val
= SDValue(selectI64ImmDirect(CurDAG
, dl
, MatImm
), 0);
1001 return CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Val
,
1002 getI32Imm(64 - RMin
), getI32Imm(MaskEnd
));
1005 static unsigned allUsesTruncate(SelectionDAG
*CurDAG
, SDNode
*N
) {
1006 unsigned MaxTruncation
= 0;
1007 // Cannot use range-based for loop here as we need the actual use (i.e. we
1008 // need the operand number corresponding to the use). A range-based for
1009 // will unbox the use and provide an SDNode*.
1010 for (SDNode::use_iterator Use
= N
->use_begin(), UseEnd
= N
->use_end();
1011 Use
!= UseEnd
; ++Use
) {
1013 Use
->isMachineOpcode() ? Use
->getMachineOpcode() : Use
->getOpcode();
1017 if (Use
->isMachineOpcode())
1020 std::max(MaxTruncation
, Use
->getValueType(0).getSizeInBits());
1023 if (Use
->isMachineOpcode())
1025 StoreSDNode
*STN
= cast
<StoreSDNode
>(*Use
);
1026 unsigned MemVTSize
= STN
->getMemoryVT().getSizeInBits();
1027 if (MemVTSize
== 64 || Use
.getOperandNo() != 0)
1029 MaxTruncation
= std::max(MaxTruncation
, MemVTSize
);
1036 if (Use
.getOperandNo() != 0)
1038 MaxTruncation
= std::max(MaxTruncation
, 32u);
1044 if (Use
.getOperandNo() != 0)
1046 MaxTruncation
= std::max(MaxTruncation
, 16u);
1052 if (Use
.getOperandNo() != 0)
1054 MaxTruncation
= std::max(MaxTruncation
, 8u);
1058 return MaxTruncation
;
1061 // Select a 64-bit constant.
1062 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, SDNode
*N
) {
1065 // Get 64 bit value.
1066 int64_t Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
1067 if (unsigned MinSize
= allUsesTruncate(CurDAG
, N
)) {
1068 uint64_t SextImm
= SignExtend64(Imm
, MinSize
);
1069 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
1070 if (isInt
<16>(SextImm
))
1071 return CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
1073 return selectI64Imm(CurDAG
, dl
, Imm
);
1078 class BitPermutationSelector
{
1082 // The bit number in the value, using a convention where bit 0 is the
1083 // lowest-order bit.
1091 ValueBit(SDValue V
, unsigned I
, Kind K
= Variable
)
1092 : V(V
), Idx(I
), K(K
) {}
1093 ValueBit(Kind K
= Variable
)
1094 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX
), K(K
) {}
1096 bool isZero() const {
1097 return K
== ConstZero
;
1100 bool hasValue() const {
1101 return K
== Variable
;
1104 SDValue
getValue() const {
1105 assert(hasValue() && "Cannot get the value of a constant bit");
1109 unsigned getValueBitIndex() const {
1110 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1115 // A bit group has the same underlying value and the same rotate factor.
1119 unsigned StartIdx
, EndIdx
;
1121 // This rotation amount assumes that the lower 32 bits of the quantity are
1122 // replicated in the high 32 bits by the rotation operator (which is done
1123 // by rlwinm and friends in 64-bit mode).
1125 // Did converting to Repl32 == true change the rotation factor? If it did,
1126 // it decreased it by 32.
1128 // Was this group coalesced after setting Repl32 to true?
1129 bool Repl32Coalesced
;
1131 BitGroup(SDValue V
, unsigned R
, unsigned S
, unsigned E
)
1132 : V(V
), RLAmt(R
), StartIdx(S
), EndIdx(E
), Repl32(false), Repl32CR(false),
1133 Repl32Coalesced(false) {
1134 LLVM_DEBUG(dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R
1135 << " [" << S
<< ", " << E
<< "]\n");
1139 // Information on each (Value, RLAmt) pair (like the number of groups
1140 // associated with each) used to choose the lowering method.
1141 struct ValueRotInfo
{
1143 unsigned RLAmt
= std::numeric_limits
<unsigned>::max();
1144 unsigned NumGroups
= 0;
1145 unsigned FirstGroupStartIdx
= std::numeric_limits
<unsigned>::max();
1146 bool Repl32
= false;
1148 ValueRotInfo() = default;
1150 // For sorting (in reverse order) by NumGroups, and then by
1151 // FirstGroupStartIdx.
1152 bool operator < (const ValueRotInfo
&Other
) const {
1153 // We need to sort so that the non-Repl32 come first because, when we're
1154 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1155 // masking operation.
1156 if (Repl32
< Other
.Repl32
)
1158 else if (Repl32
> Other
.Repl32
)
1160 else if (NumGroups
> Other
.NumGroups
)
1162 else if (NumGroups
< Other
.NumGroups
)
1164 else if (RLAmt
== 0 && Other
.RLAmt
!= 0)
1166 else if (RLAmt
!= 0 && Other
.RLAmt
== 0)
1168 else if (FirstGroupStartIdx
< Other
.FirstGroupStartIdx
)
1174 using ValueBitsMemoizedValue
= std::pair
<bool, SmallVector
<ValueBit
, 64>>;
1175 using ValueBitsMemoizer
=
1176 DenseMap
<SDValue
, std::unique_ptr
<ValueBitsMemoizedValue
>>;
1177 ValueBitsMemoizer Memoizer
;
1179 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1180 // The bool is true if something interesting was deduced, otherwise if we're
1181 // providing only a generic representation of V (or something else likewise
1182 // uninteresting for instruction selection) through the SmallVector.
1183 std::pair
<bool, SmallVector
<ValueBit
, 64> *> getValueBits(SDValue V
,
1185 auto &ValueEntry
= Memoizer
[V
];
1187 return std::make_pair(ValueEntry
->first
, &ValueEntry
->second
);
1188 ValueEntry
.reset(new ValueBitsMemoizedValue());
1189 bool &Interesting
= ValueEntry
->first
;
1190 SmallVector
<ValueBit
, 64> &Bits
= ValueEntry
->second
;
1191 Bits
.resize(NumBits
);
1193 switch (V
.getOpcode()) {
1196 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1197 unsigned RotAmt
= V
.getConstantOperandVal(1);
1199 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1201 for (unsigned i
= 0; i
< NumBits
; ++i
)
1202 Bits
[i
] = LHSBits
[i
< RotAmt
? i
+ (NumBits
- RotAmt
) : i
- RotAmt
];
1204 return std::make_pair(Interesting
= true, &Bits
);
1208 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1209 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1211 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1213 for (unsigned i
= ShiftAmt
; i
< NumBits
; ++i
)
1214 Bits
[i
] = LHSBits
[i
- ShiftAmt
];
1216 for (unsigned i
= 0; i
< ShiftAmt
; ++i
)
1217 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1219 return std::make_pair(Interesting
= true, &Bits
);
1223 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1224 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1226 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1228 for (unsigned i
= 0; i
< NumBits
- ShiftAmt
; ++i
)
1229 Bits
[i
] = LHSBits
[i
+ ShiftAmt
];
1231 for (unsigned i
= NumBits
- ShiftAmt
; i
< NumBits
; ++i
)
1232 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1234 return std::make_pair(Interesting
= true, &Bits
);
1238 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1239 uint64_t Mask
= V
.getConstantOperandVal(1);
1241 const SmallVector
<ValueBit
, 64> *LHSBits
;
1242 // Mark this as interesting, only if the LHS was also interesting. This
1243 // prevents the overall procedure from matching a single immediate 'and'
1244 // (which is non-optimal because such an and might be folded with other
1245 // things if we don't select it here).
1246 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0), NumBits
);
1248 for (unsigned i
= 0; i
< NumBits
; ++i
)
1249 if (((Mask
>> i
) & 1) == 1)
1250 Bits
[i
] = (*LHSBits
)[i
];
1252 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1254 return std::make_pair(Interesting
, &Bits
);
1258 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1259 const auto &RHSBits
= *getValueBits(V
.getOperand(1), NumBits
).second
;
1261 bool AllDisjoint
= true;
1262 for (unsigned i
= 0; i
< NumBits
; ++i
)
1263 if (LHSBits
[i
].isZero())
1264 Bits
[i
] = RHSBits
[i
];
1265 else if (RHSBits
[i
].isZero())
1266 Bits
[i
] = LHSBits
[i
];
1268 AllDisjoint
= false;
1275 return std::make_pair(Interesting
= true, &Bits
);
1277 case ISD::ZERO_EXTEND
: {
1278 // We support only the case with zero extension from i32 to i64 so far.
1279 if (V
.getValueType() != MVT::i64
||
1280 V
.getOperand(0).getValueType() != MVT::i32
)
1283 const SmallVector
<ValueBit
, 64> *LHSBits
;
1284 const unsigned NumOperandBits
= 32;
1285 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1288 for (unsigned i
= 0; i
< NumOperandBits
; ++i
)
1289 Bits
[i
] = (*LHSBits
)[i
];
1291 for (unsigned i
= NumOperandBits
; i
< NumBits
; ++i
)
1292 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1294 return std::make_pair(Interesting
, &Bits
);
1298 for (unsigned i
= 0; i
< NumBits
; ++i
)
1299 Bits
[i
] = ValueBit(V
, i
);
1301 return std::make_pair(Interesting
= false, &Bits
);
1304 // For each value (except the constant ones), compute the left-rotate amount
1305 // to get it from its original to final position.
1306 void computeRotationAmounts() {
1308 RLAmt
.resize(Bits
.size());
1309 for (unsigned i
= 0; i
< Bits
.size(); ++i
)
1310 if (Bits
[i
].hasValue()) {
1311 unsigned VBI
= Bits
[i
].getValueBitIndex();
1315 RLAmt
[i
] = Bits
.size() - (VBI
- i
);
1316 } else if (Bits
[i
].isZero()) {
1318 RLAmt
[i
] = UINT32_MAX
;
1320 llvm_unreachable("Unknown value bit type");
1324 // Collect groups of consecutive bits with the same underlying value and
1325 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1326 // they break up groups.
1327 void collectBitGroups(bool LateMask
) {
1330 unsigned LastRLAmt
= RLAmt
[0];
1331 SDValue LastValue
= Bits
[0].hasValue() ? Bits
[0].getValue() : SDValue();
1332 unsigned LastGroupStartIdx
= 0;
1333 for (unsigned i
= 1; i
< Bits
.size(); ++i
) {
1334 unsigned ThisRLAmt
= RLAmt
[i
];
1335 SDValue ThisValue
= Bits
[i
].hasValue() ? Bits
[i
].getValue() : SDValue();
1336 if (LateMask
&& !ThisValue
) {
1337 ThisValue
= LastValue
;
1338 ThisRLAmt
= LastRLAmt
;
1339 // If we're doing late masking, then the first bit group always starts
1340 // at zero (even if the first bits were zero).
1341 if (BitGroups
.empty())
1342 LastGroupStartIdx
= 0;
1345 // If this bit has the same underlying value and the same rotate factor as
1346 // the last one, then they're part of the same group.
1347 if (ThisRLAmt
== LastRLAmt
&& ThisValue
== LastValue
)
1350 if (LastValue
.getNode())
1351 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1353 LastRLAmt
= ThisRLAmt
;
1354 LastValue
= ThisValue
;
1355 LastGroupStartIdx
= i
;
1357 if (LastValue
.getNode())
1358 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1361 if (BitGroups
.empty())
1364 // We might be able to combine the first and last groups.
1365 if (BitGroups
.size() > 1) {
1366 // If the first and last groups are the same, then remove the first group
1367 // in favor of the last group, making the ending index of the last group
1368 // equal to the ending index of the to-be-removed first group.
1369 if (BitGroups
[0].StartIdx
== 0 &&
1370 BitGroups
[BitGroups
.size()-1].EndIdx
== Bits
.size()-1 &&
1371 BitGroups
[0].V
== BitGroups
[BitGroups
.size()-1].V
&&
1372 BitGroups
[0].RLAmt
== BitGroups
[BitGroups
.size()-1].RLAmt
) {
1373 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1374 BitGroups
[BitGroups
.size()-1].EndIdx
= BitGroups
[0].EndIdx
;
1375 BitGroups
.erase(BitGroups
.begin());
1380 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1381 // associated with each. If the number of groups are same, we prefer a group
1382 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1383 // instruction. If there is a degeneracy, pick the one that occurs
1384 // first (in the final value).
1385 void collectValueRotInfo() {
1388 for (auto &BG
: BitGroups
) {
1389 unsigned RLAmtKey
= BG
.RLAmt
+ (BG
.Repl32
? 64 : 0);
1390 ValueRotInfo
&VRI
= ValueRots
[std::make_pair(BG
.V
, RLAmtKey
)];
1392 VRI
.RLAmt
= BG
.RLAmt
;
1393 VRI
.Repl32
= BG
.Repl32
;
1395 VRI
.FirstGroupStartIdx
= std::min(VRI
.FirstGroupStartIdx
, BG
.StartIdx
);
1398 // Now that we've collected the various ValueRotInfo instances, we need to
1400 ValueRotsVec
.clear();
1401 for (auto &I
: ValueRots
) {
1402 ValueRotsVec
.push_back(I
.second
);
1404 llvm::sort(ValueRotsVec
.begin(), ValueRotsVec
.end());
1407 // In 64-bit mode, rlwinm and friends have a rotation operator that
1408 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1409 // indices of these instructions can only be in the lower 32 bits, so they
1410 // can only represent some 64-bit bit groups. However, when they can be used,
1411 // the 32-bit replication can be used to represent, as a single bit group,
1412 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1413 // groups when possible. Returns true if any of the bit groups were
1415 void assignRepl32BitGroups() {
1416 // If we have bits like this:
1418 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1419 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1420 // Groups: | RLAmt = 8 | RLAmt = 40 |
1422 // But, making use of a 32-bit operation that replicates the low-order 32
1423 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1426 auto IsAllLow32
= [this](BitGroup
& BG
) {
1427 if (BG
.StartIdx
<= BG
.EndIdx
) {
1428 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
) {
1429 if (!Bits
[i
].hasValue())
1431 if (Bits
[i
].getValueBitIndex() >= 32)
1435 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
) {
1436 if (!Bits
[i
].hasValue())
1438 if (Bits
[i
].getValueBitIndex() >= 32)
1441 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
) {
1442 if (!Bits
[i
].hasValue())
1444 if (Bits
[i
].getValueBitIndex() >= 32)
1452 for (auto &BG
: BitGroups
) {
1453 // If this bit group has RLAmt of 0 and will not be merged with
1454 // another bit group, we don't benefit from Repl32. We don't mark
1455 // such group to give more freedom for later instruction selection.
1456 if (BG
.RLAmt
== 0) {
1457 auto PotentiallyMerged
= [this](BitGroup
& BG
) {
1458 for (auto &BG2
: BitGroups
)
1459 if (&BG
!= &BG2
&& BG
.V
== BG2
.V
&&
1460 (BG2
.RLAmt
== 0 || BG2
.RLAmt
== 32))
1464 if (!PotentiallyMerged(BG
))
1467 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32) {
1468 if (IsAllLow32(BG
)) {
1469 if (BG
.RLAmt
>= 32) {
1476 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1477 << BG
.V
.getNode() << " RLAmt = " << BG
.RLAmt
<< " ["
1478 << BG
.StartIdx
<< ", " << BG
.EndIdx
<< "]\n");
1483 // Now walk through the bit groups, consolidating where possible.
1484 for (auto I
= BitGroups
.begin(); I
!= BitGroups
.end();) {
1485 // We might want to remove this bit group by merging it with the previous
1486 // group (which might be the ending group).
1487 auto IP
= (I
== BitGroups
.begin()) ?
1488 std::prev(BitGroups
.end()) : std::prev(I
);
1489 if (I
->Repl32
&& IP
->Repl32
&& I
->V
== IP
->V
&& I
->RLAmt
== IP
->RLAmt
&&
1490 I
->StartIdx
== (IP
->EndIdx
+ 1) % 64 && I
!= IP
) {
1492 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1493 << I
->V
.getNode() << " RLAmt = " << I
->RLAmt
<< " ["
1494 << I
->StartIdx
<< ", " << I
->EndIdx
1495 << "] with group with range [" << IP
->StartIdx
<< ", "
1496 << IP
->EndIdx
<< "]\n");
1498 IP
->EndIdx
= I
->EndIdx
;
1499 IP
->Repl32CR
= IP
->Repl32CR
|| I
->Repl32CR
;
1500 IP
->Repl32Coalesced
= true;
1501 I
= BitGroups
.erase(I
);
1504 // There is a special case worth handling: If there is a single group
1505 // covering the entire upper 32 bits, and it can be merged with both
1506 // the next and previous groups (which might be the same group), then
1507 // do so. If it is the same group (so there will be only one group in
1508 // total), then we need to reverse the order of the range so that it
1509 // covers the entire 64 bits.
1510 if (I
->StartIdx
== 32 && I
->EndIdx
== 63) {
1511 assert(std::next(I
) == BitGroups
.end() &&
1512 "bit group ends at index 63 but there is another?");
1513 auto IN
= BitGroups
.begin();
1515 if (IP
->Repl32
&& IN
->Repl32
&& I
->V
== IP
->V
&& I
->V
== IN
->V
&&
1516 (I
->RLAmt
% 32) == IP
->RLAmt
&& (I
->RLAmt
% 32) == IN
->RLAmt
&&
1517 IP
->EndIdx
== 31 && IN
->StartIdx
== 0 && I
!= IP
&&
1520 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I
->V
.getNode()
1521 << " RLAmt = " << I
->RLAmt
<< " [" << I
->StartIdx
1522 << ", " << I
->EndIdx
1523 << "] with 32-bit replicated groups with ranges ["
1524 << IP
->StartIdx
<< ", " << IP
->EndIdx
<< "] and ["
1525 << IN
->StartIdx
<< ", " << IN
->EndIdx
<< "]\n");
1528 // There is only one other group; change it to cover the whole
1529 // range (backward, so that it can still be Repl32 but cover the
1530 // whole 64-bit range).
1533 IP
->Repl32CR
= IP
->Repl32CR
|| I
->RLAmt
>= 32;
1534 IP
->Repl32Coalesced
= true;
1535 I
= BitGroups
.erase(I
);
1537 // There are two separate groups, one before this group and one
1538 // after us (at the beginning). We're going to remove this group,
1539 // but also the group at the very beginning.
1540 IP
->EndIdx
= IN
->EndIdx
;
1541 IP
->Repl32CR
= IP
->Repl32CR
|| IN
->Repl32CR
|| I
->RLAmt
>= 32;
1542 IP
->Repl32Coalesced
= true;
1543 I
= BitGroups
.erase(I
);
1544 BitGroups
.erase(BitGroups
.begin());
1547 // This must be the last group in the vector (and we might have
1548 // just invalidated the iterator above), so break here.
1558 SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
1559 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1562 uint64_t getZerosMask() {
1564 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1565 if (Bits
[i
].hasValue())
1567 Mask
|= (UINT64_C(1) << i
);
1573 // This method extends an input value to 64 bit if input is 32-bit integer.
1574 // While selecting instructions in BitPermutationSelector in 64-bit mode,
1575 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1576 // In such case, we extend it to 64 bit to be consistent with other values.
1577 SDValue
ExtendToInt64(SDValue V
, const SDLoc
&dl
) {
1578 if (V
.getValueSizeInBits() == 64)
1581 assert(V
.getValueSizeInBits() == 32);
1582 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1583 SDValue ImDef
= SDValue(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
1585 SDValue ExtVal
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
1591 // Depending on the number of groups for a particular value, it might be
1592 // better to rotate, mask explicitly (using andi/andis), and then or the
1593 // result. Select this part of the result first.
1594 void SelectAndParts32(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1595 if (BPermRewriterNoMasking
)
1598 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1600 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1601 if (!Bits
[i
].hasValue() || Bits
[i
].getValue() != VRI
.V
)
1603 if (RLAmt
[i
] != VRI
.RLAmt
)
1608 // Compute the masks for andi/andis that would be necessary.
1609 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1610 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1611 "No set bits in mask for value bit groups");
1612 bool NeedsRotate
= VRI
.RLAmt
!= 0;
1614 // We're trying to minimize the number of instructions. If we have one
1615 // group, using one of andi/andis can break even. If we have three
1616 // groups, we can use both andi and andis and break even (to use both
1617 // andi and andis we also need to or the results together). We need four
1618 // groups if we also need to rotate. To use andi/andis we need to do more
1619 // than break even because rotate-and-mask instructions tend to be easier
1622 // FIXME: We've biased here against using andi/andis, which is right for
1623 // POWER cores, but not optimal everywhere. For example, on the A2,
1624 // andi/andis have single-cycle latency whereas the rotate-and-mask
1625 // instructions take two cycles, and it would be better to bias toward
1626 // andi/andis in break-even cases.
1628 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
1629 (unsigned) (ANDIMask
!= 0) +
1630 (unsigned) (ANDISMask
!= 0) +
1631 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0) +
1632 (unsigned) (bool) Res
;
1634 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
1635 << " RL: " << VRI
.RLAmt
<< ":"
1636 << "\n\t\t\tisel using masking: " << NumAndInsts
1637 << " using rotates: " << VRI
.NumGroups
<< "\n");
1639 if (NumAndInsts
>= VRI
.NumGroups
)
1642 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1644 if (InstCnt
) *InstCnt
+= NumAndInsts
;
1649 { VRI
.V
, getI32Imm(VRI
.RLAmt
, dl
), getI32Imm(0, dl
),
1650 getI32Imm(31, dl
) };
1651 VRot
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
1657 SDValue ANDIVal
, ANDISVal
;
1659 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1660 VRot
, getI32Imm(ANDIMask
, dl
)), 0);
1662 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1663 VRot
, getI32Imm(ANDISMask
, dl
)), 0);
1667 TotalVal
= ANDISVal
;
1671 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1672 ANDIVal
, ANDISVal
), 0);
1677 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1680 // Now, remove all groups with this underlying value and rotation
1682 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1683 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1688 // Instruction selection for the 32-bit case.
1689 SDNode
*Select32(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
1693 if (InstCnt
) *InstCnt
= 0;
1695 // Take care of cases that should use andi/andis first.
1696 SelectAndParts32(dl
, Res
, InstCnt
);
1698 // If we've not yet selected a 'starting' instruction, and we have no zeros
1699 // to fill in, select the (Value, RLAmt) with the highest priority (largest
1700 // number of groups), and start with this rotated value.
1701 if ((!HasZeros
|| LateMask
) && !Res
) {
1702 ValueRotInfo
&VRI
= ValueRotsVec
[0];
1704 if (InstCnt
) *InstCnt
+= 1;
1706 { VRI
.V
, getI32Imm(VRI
.RLAmt
, dl
), getI32Imm(0, dl
),
1707 getI32Imm(31, dl
) };
1708 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
),
1714 // Now, remove all groups with this underlying value and rotation factor.
1715 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1716 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1720 if (InstCnt
) *InstCnt
+= BitGroups
.size();
1722 // Insert the other groups (one at a time).
1723 for (auto &BG
: BitGroups
) {
1726 { BG
.V
, getI32Imm(BG
.RLAmt
, dl
),
1727 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1728 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1729 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
1732 { Res
, BG
.V
, getI32Imm(BG
.RLAmt
, dl
),
1733 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1734 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1735 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
), 0);
1740 unsigned Mask
= (unsigned) getZerosMask();
1742 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1743 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1744 "No set bits in zeros mask?");
1746 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
1747 (unsigned) (ANDISMask
!= 0) +
1748 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1750 SDValue ANDIVal
, ANDISVal
;
1752 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1753 Res
, getI32Imm(ANDIMask
, dl
)), 0);
1755 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1756 Res
, getI32Imm(ANDISMask
, dl
)), 0);
1763 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1764 ANDIVal
, ANDISVal
), 0);
1767 return Res
.getNode();
1770 unsigned SelectRotMask64Count(unsigned RLAmt
, bool Repl32
,
1771 unsigned MaskStart
, unsigned MaskEnd
,
1773 // In the notation used by the instructions, 'start' and 'end' are reversed
1774 // because bits are counted from high to low order.
1775 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1776 InstMaskEnd
= 64 - MaskStart
- 1;
1781 if ((!IsIns
&& (InstMaskEnd
== 63 || InstMaskStart
== 0)) ||
1782 InstMaskEnd
== 63 - RLAmt
)
1788 // For 64-bit values, not all combinations of rotates and masks are
1789 // available. Produce one if it is available.
1790 SDValue
SelectRotMask64(SDValue V
, const SDLoc
&dl
, unsigned RLAmt
,
1791 bool Repl32
, unsigned MaskStart
, unsigned MaskEnd
,
1792 unsigned *InstCnt
= nullptr) {
1793 // In the notation used by the instructions, 'start' and 'end' are reversed
1794 // because bits are counted from high to low order.
1795 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1796 InstMaskEnd
= 64 - MaskStart
- 1;
1798 if (InstCnt
) *InstCnt
+= 1;
1801 // This rotation amount assumes that the lower 32 bits of the quantity
1802 // are replicated in the high 32 bits by the rotation operator (which is
1803 // done by rlwinm and friends).
1804 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1805 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1807 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1808 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
1809 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM8
, dl
, MVT::i64
,
1813 if (InstMaskEnd
== 63) {
1815 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1816 getI32Imm(InstMaskStart
, dl
) };
1817 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Ops
), 0);
1820 if (InstMaskStart
== 0) {
1822 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1823 getI32Imm(InstMaskEnd
, dl
) };
1824 return SDValue(CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Ops
), 0);
1827 if (InstMaskEnd
== 63 - RLAmt
) {
1829 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1830 getI32Imm(InstMaskStart
, dl
) };
1831 return SDValue(CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, Ops
), 0);
1834 // We cannot do this with a single instruction, so we'll use two. The
1835 // problem is that we're not free to choose both a rotation amount and mask
1836 // start and end independently. We can choose an arbitrary mask start and
1837 // end, but then the rotation amount is fixed. Rotation, however, can be
1838 // inverted, and so by applying an "inverse" rotation first, we can get the
1840 if (InstCnt
) *InstCnt
+= 1;
1842 // The rotation mask for the second instruction must be MaskStart.
1843 unsigned RLAmt2
= MaskStart
;
1844 // The first instruction must rotate V so that the overall rotation amount
1846 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
1848 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
1849 return SelectRotMask64(V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
1852 // For 64-bit values, not all combinations of rotates and masks are
1853 // available. Produce a rotate-mask-and-insert if one is available.
1854 SDValue
SelectRotMaskIns64(SDValue Base
, SDValue V
, const SDLoc
&dl
,
1855 unsigned RLAmt
, bool Repl32
, unsigned MaskStart
,
1856 unsigned MaskEnd
, unsigned *InstCnt
= nullptr) {
1857 // In the notation used by the instructions, 'start' and 'end' are reversed
1858 // because bits are counted from high to low order.
1859 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1860 InstMaskEnd
= 64 - MaskStart
- 1;
1862 if (InstCnt
) *InstCnt
+= 1;
1865 // This rotation amount assumes that the lower 32 bits of the quantity
1866 // are replicated in the high 32 bits by the rotation operator (which is
1867 // done by rlwinm and friends).
1868 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1869 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1871 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1872 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
1873 return SDValue(CurDAG
->getMachineNode(PPC::RLWIMI8
, dl
, MVT::i64
,
1877 if (InstMaskEnd
== 63 - RLAmt
) {
1879 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1880 getI32Imm(InstMaskStart
, dl
) };
1881 return SDValue(CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
), 0);
1884 // We cannot do this with a single instruction, so we'll use two. The
1885 // problem is that we're not free to choose both a rotation amount and mask
1886 // start and end independently. We can choose an arbitrary mask start and
1887 // end, but then the rotation amount is fixed. Rotation, however, can be
1888 // inverted, and so by applying an "inverse" rotation first, we can get the
1890 if (InstCnt
) *InstCnt
+= 1;
1892 // The rotation mask for the second instruction must be MaskStart.
1893 unsigned RLAmt2
= MaskStart
;
1894 // The first instruction must rotate V so that the overall rotation amount
1896 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
1898 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
1899 return SelectRotMaskIns64(Base
, V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
1902 void SelectAndParts64(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1903 if (BPermRewriterNoMasking
)
1906 // The idea here is the same as in the 32-bit version, but with additional
1907 // complications from the fact that Repl32 might be true. Because we
1908 // aggressively convert bit groups to Repl32 form (which, for small
1909 // rotation factors, involves no other change), and then coalesce, it might
1910 // be the case that a single 64-bit masking operation could handle both
1911 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1912 // form allowed coalescing, then we must use a 32-bit rotaton in order to
1913 // completely capture the new combined bit group.
1915 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1918 // We need to add to the mask all bits from the associated bit groups.
1919 // If Repl32 is false, we need to add bits from bit groups that have
1920 // Repl32 true, but are trivially convertable to Repl32 false. Such a
1921 // group is trivially convertable if it overlaps only with the lower 32
1922 // bits, and the group has not been coalesced.
1923 auto MatchingBG
= [VRI
](const BitGroup
&BG
) {
1927 unsigned EffRLAmt
= BG
.RLAmt
;
1928 if (!VRI
.Repl32
&& BG
.Repl32
) {
1929 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
<= BG
.EndIdx
&&
1930 !BG
.Repl32Coalesced
) {
1936 } else if (VRI
.Repl32
!= BG
.Repl32
) {
1940 return VRI
.RLAmt
== EffRLAmt
;
1943 for (auto &BG
: BitGroups
) {
1944 if (!MatchingBG(BG
))
1947 if (BG
.StartIdx
<= BG
.EndIdx
) {
1948 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
)
1949 Mask
|= (UINT64_C(1) << i
);
1951 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
)
1952 Mask
|= (UINT64_C(1) << i
);
1953 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
)
1954 Mask
|= (UINT64_C(1) << i
);
1958 // We can use the 32-bit andi/andis technique if the mask does not
1959 // require any higher-order bits. This can save an instruction compared
1960 // to always using the general 64-bit technique.
1961 bool Use32BitInsts
= isUInt
<32>(Mask
);
1962 // Compute the masks for andi/andis that would be necessary.
1963 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
1964 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
1966 bool NeedsRotate
= VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
));
1968 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
1969 (unsigned) (bool) Res
;
1971 NumAndInsts
+= (unsigned) (ANDIMask
!= 0) + (unsigned) (ANDISMask
!= 0) +
1972 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1974 NumAndInsts
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
1976 unsigned NumRLInsts
= 0;
1977 bool FirstBG
= true;
1978 bool MoreBG
= false;
1979 for (auto &BG
: BitGroups
) {
1980 if (!MatchingBG(BG
)) {
1985 SelectRotMask64Count(BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
, BG
.EndIdx
,
1990 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
1991 << " RL: " << VRI
.RLAmt
<< (VRI
.Repl32
? " (32):" : ":")
1992 << "\n\t\t\tisel using masking: " << NumAndInsts
1993 << " using rotates: " << NumRLInsts
<< "\n");
1995 // When we'd use andi/andis, we bias toward using the rotates (andi only
1996 // has a record form, and is cracked on POWER cores). However, when using
1997 // general 64-bit constant formation, bias toward the constant form,
1998 // because that exposes more opportunities for CSE.
1999 if (NumAndInsts
> NumRLInsts
)
2001 // When merging multiple bit groups, instruction or is used.
2002 // But when rotate is used, rldimi can inert the rotated value into any
2003 // register, so instruction or can be avoided.
2004 if ((Use32BitInsts
|| MoreBG
) && NumAndInsts
== NumRLInsts
)
2007 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2009 if (InstCnt
) *InstCnt
+= NumAndInsts
;
2012 // We actually need to generate a rotation if we have a non-zero rotation
2013 // factor or, in the Repl32 case, if we care about any of the
2014 // higher-order replicated bits. In the latter case, we generate a mask
2015 // backward so that it actually includes the entire 64 bits.
2016 if (VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
)))
2017 VRot
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2018 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63);
2023 if (Use32BitInsts
) {
2024 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2025 "No set bits in mask when using 32-bit ands for 64-bit value");
2027 SDValue ANDIVal
, ANDISVal
;
2029 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
2030 ExtendToInt64(VRot
, dl
),
2031 getI32Imm(ANDIMask
, dl
)),
2034 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
2035 ExtendToInt64(VRot
, dl
),
2036 getI32Imm(ANDISMask
, dl
)),
2040 TotalVal
= ANDISVal
;
2044 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2045 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2047 TotalVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2049 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2050 ExtendToInt64(VRot
, dl
), TotalVal
),
2057 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2058 ExtendToInt64(Res
, dl
), TotalVal
),
2061 // Now, remove all groups with this underlying value and rotation
2063 eraseMatchingBitGroups(MatchingBG
);
2067 // Instruction selection for the 64-bit case.
2068 SDNode
*Select64(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
2072 if (InstCnt
) *InstCnt
= 0;
2074 // Take care of cases that should use andi/andis first.
2075 SelectAndParts64(dl
, Res
, InstCnt
);
2077 // If we've not yet selected a 'starting' instruction, and we have no zeros
2078 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2079 // number of groups), and start with this rotated value.
2080 if ((!HasZeros
|| LateMask
) && !Res
) {
2081 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2082 // groups will come first, and so the VRI representing the largest number
2083 // of groups might not be first (it might be the first Repl32 groups).
2084 unsigned MaxGroupsIdx
= 0;
2085 if (!ValueRotsVec
[0].Repl32
) {
2086 for (unsigned i
= 0, ie
= ValueRotsVec
.size(); i
< ie
; ++i
)
2087 if (ValueRotsVec
[i
].Repl32
) {
2088 if (ValueRotsVec
[i
].NumGroups
> ValueRotsVec
[0].NumGroups
)
2094 ValueRotInfo
&VRI
= ValueRotsVec
[MaxGroupsIdx
];
2095 bool NeedsRotate
= false;
2098 } else if (VRI
.Repl32
) {
2099 for (auto &BG
: BitGroups
) {
2100 if (BG
.V
!= VRI
.V
|| BG
.RLAmt
!= VRI
.RLAmt
||
2101 BG
.Repl32
!= VRI
.Repl32
)
2104 // We don't need a rotate if the bit group is confined to the lower
2106 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
< BG
.EndIdx
)
2115 Res
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2116 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63,
2121 // Now, remove all groups with this underlying value and rotation factor.
2123 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
2124 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
&&
2125 BG
.Repl32
== VRI
.Repl32
;
2129 // Because 64-bit rotates are more flexible than inserts, we might have a
2130 // preference regarding which one we do first (to save one instruction).
2132 for (auto I
= BitGroups
.begin(), IE
= BitGroups
.end(); I
!= IE
; ++I
) {
2133 if (SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2135 SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2137 if (I
!= BitGroups
.begin()) {
2140 BitGroups
.insert(BitGroups
.begin(), BG
);
2147 // Insert the other groups (one at a time).
2148 for (auto &BG
: BitGroups
) {
2150 Res
= SelectRotMask64(BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
,
2151 BG
.EndIdx
, InstCnt
);
2153 Res
= SelectRotMaskIns64(Res
, BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
,
2154 BG
.StartIdx
, BG
.EndIdx
, InstCnt
);
2158 uint64_t Mask
= getZerosMask();
2160 // We can use the 32-bit andi/andis technique if the mask does not
2161 // require any higher-order bits. This can save an instruction compared
2162 // to always using the general 64-bit technique.
2163 bool Use32BitInsts
= isUInt
<32>(Mask
);
2164 // Compute the masks for andi/andis that would be necessary.
2165 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2166 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2168 if (Use32BitInsts
) {
2169 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2170 "No set bits in mask when using 32-bit ands for 64-bit value");
2172 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
2173 (unsigned) (ANDISMask
!= 0) +
2174 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2176 SDValue ANDIVal
, ANDISVal
;
2178 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
2179 ExtendToInt64(Res
, dl
), getI32Imm(ANDIMask
, dl
)), 0);
2181 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
2182 ExtendToInt64(Res
, dl
), getI32Imm(ANDISMask
, dl
)), 0);
2189 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2190 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2192 if (InstCnt
) *InstCnt
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
2194 SDValue MaskVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2196 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2197 ExtendToInt64(Res
, dl
), MaskVal
), 0);
2201 return Res
.getNode();
2204 SDNode
*Select(SDNode
*N
, bool LateMask
, unsigned *InstCnt
= nullptr) {
2205 // Fill in BitGroups.
2206 collectBitGroups(LateMask
);
2207 if (BitGroups
.empty())
2210 // For 64-bit values, figure out when we can use 32-bit instructions.
2211 if (Bits
.size() == 64)
2212 assignRepl32BitGroups();
2214 // Fill in ValueRotsVec.
2215 collectValueRotInfo();
2217 if (Bits
.size() == 32) {
2218 return Select32(N
, LateMask
, InstCnt
);
2220 assert(Bits
.size() == 64 && "Not 64 bits here?");
2221 return Select64(N
, LateMask
, InstCnt
);
2227 void eraseMatchingBitGroups(function_ref
<bool(const BitGroup
&)> F
) {
2228 BitGroups
.erase(remove_if(BitGroups
, F
), BitGroups
.end());
2231 SmallVector
<ValueBit
, 64> Bits
;
2234 SmallVector
<unsigned, 64> RLAmt
;
2236 SmallVector
<BitGroup
, 16> BitGroups
;
2238 DenseMap
<std::pair
<SDValue
, unsigned>, ValueRotInfo
> ValueRots
;
2239 SmallVector
<ValueRotInfo
, 16> ValueRotsVec
;
2241 SelectionDAG
*CurDAG
;
2244 BitPermutationSelector(SelectionDAG
*DAG
)
2247 // Here we try to match complex bit permutations into a set of
2248 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2249 // known to produce optimial code for common cases (like i32 byte swapping).
2250 SDNode
*Select(SDNode
*N
) {
2253 getValueBits(SDValue(N
, 0), N
->getValueType(0).getSizeInBits());
2256 Bits
= std::move(*Result
.second
);
2258 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2259 " selection for: ");
2260 LLVM_DEBUG(N
->dump(CurDAG
));
2262 // Fill it RLAmt and set HasZeros.
2263 computeRotationAmounts();
2266 return Select(N
, false);
2268 // We currently have two techniques for handling results with zeros: early
2269 // masking (the default) and late masking. Late masking is sometimes more
2270 // efficient, but because the structure of the bit groups is different, it
2271 // is hard to tell without generating both and comparing the results. With
2272 // late masking, we ignore zeros in the resulting value when inserting each
2273 // set of bit groups, and then mask in the zeros at the end. With early
2274 // masking, we only insert the non-zero parts of the result at every step.
2276 unsigned InstCnt
= 0, InstCntLateMask
= 0;
2277 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2278 SDNode
*RN
= Select(N
, false, &InstCnt
);
2279 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt
<< " instructions\n");
2281 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2282 SDNode
*RNLM
= Select(N
, true, &InstCntLateMask
);
2283 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2284 << " instructions\n");
2286 if (InstCnt
<= InstCntLateMask
) {
2287 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2291 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2296 class IntegerCompareEliminator
{
2297 SelectionDAG
*CurDAG
;
2299 // Conversion type for interpreting results of a 32-bit instruction as
2300 // a 64-bit value or vice versa.
2301 enum ExtOrTruncConversion
{ Ext
, Trunc
};
2303 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2305 // ZExtOrig - use the original condition code, zero-extend value
2306 // ZExtInvert - invert the condition code, zero-extend value
2307 // SExtOrig - use the original condition code, sign-extend value
2308 // SExtInvert - invert the condition code, sign-extend value
2309 enum SetccInGPROpts
{ ZExtOrig
, ZExtInvert
, SExtOrig
, SExtInvert
};
2311 // Comparisons against zero to emit GPR code sequences for. Each of these
2312 // sequences may need to be emitted for two or more equivalent patterns.
2313 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2314 // matters as well as the extension type: sext (-1/0), zext (1/0).
2315 // GEZExt - (zext (LHS >= 0))
2316 // GESExt - (sext (LHS >= 0))
2317 // LEZExt - (zext (LHS <= 0))
2318 // LESExt - (sext (LHS <= 0))
2319 enum ZeroCompare
{ GEZExt
, GESExt
, LEZExt
, LESExt
};
2321 SDNode
*tryEXTEND(SDNode
*N
);
2322 SDNode
*tryLogicOpOfCompares(SDNode
*N
);
2323 SDValue
computeLogicOpInGPR(SDValue LogicOp
);
2324 SDValue
signExtendInputIfNeeded(SDValue Input
);
2325 SDValue
zeroExtendInputIfNeeded(SDValue Input
);
2326 SDValue
addExtOrTrunc(SDValue NatWidthRes
, ExtOrTruncConversion Conv
);
2327 SDValue
getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2329 SDValue
get32BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2330 int64_t RHSValue
, SDLoc dl
);
2331 SDValue
get32BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2332 int64_t RHSValue
, SDLoc dl
);
2333 SDValue
get64BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2334 int64_t RHSValue
, SDLoc dl
);
2335 SDValue
get64BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2336 int64_t RHSValue
, SDLoc dl
);
2337 SDValue
getSETCCInGPR(SDValue Compare
, SetccInGPROpts ConvOpts
);
2340 IntegerCompareEliminator(SelectionDAG
*DAG
,
2341 PPCDAGToDAGISel
*Sel
) : CurDAG(DAG
), S(Sel
) {
2342 assert(CurDAG
->getTargetLoweringInfo()
2343 .getPointerTy(CurDAG
->getDataLayout()).getSizeInBits() == 64 &&
2344 "Only expecting to use this on 64 bit targets.");
2346 SDNode
*Select(SDNode
*N
) {
2347 if (CmpInGPR
== ICGPR_None
)
2349 switch (N
->getOpcode()) {
2351 case ISD::ZERO_EXTEND
:
2352 if (CmpInGPR
== ICGPR_Sext
|| CmpInGPR
== ICGPR_SextI32
||
2353 CmpInGPR
== ICGPR_SextI64
)
2356 case ISD::SIGN_EXTEND
:
2357 if (CmpInGPR
== ICGPR_Zext
|| CmpInGPR
== ICGPR_ZextI32
||
2358 CmpInGPR
== ICGPR_ZextI64
)
2360 return tryEXTEND(N
);
2364 return tryLogicOpOfCompares(N
);
2370 static bool isLogicOp(unsigned Opc
) {
2371 return Opc
== ISD::AND
|| Opc
== ISD::OR
|| Opc
== ISD::XOR
;
2373 // The obvious case for wanting to keep the value in a GPR. Namely, the
2374 // result of the comparison is actually needed in a GPR.
2375 SDNode
*IntegerCompareEliminator::tryEXTEND(SDNode
*N
) {
2376 assert((N
->getOpcode() == ISD::ZERO_EXTEND
||
2377 N
->getOpcode() == ISD::SIGN_EXTEND
) &&
2378 "Expecting a zero/sign extend node!");
2380 // If we are zero-extending the result of a logical operation on i1
2381 // values, we can keep the values in GPRs.
2382 if (isLogicOp(N
->getOperand(0).getOpcode()) &&
2383 N
->getOperand(0).getValueType() == MVT::i1
&&
2384 N
->getOpcode() == ISD::ZERO_EXTEND
)
2385 WideRes
= computeLogicOpInGPR(N
->getOperand(0));
2386 else if (N
->getOperand(0).getOpcode() != ISD::SETCC
)
2390 getSETCCInGPR(N
->getOperand(0),
2391 N
->getOpcode() == ISD::SIGN_EXTEND
?
2392 SetccInGPROpts::SExtOrig
: SetccInGPROpts::ZExtOrig
);
2398 bool Input32Bit
= WideRes
.getValueType() == MVT::i32
;
2399 bool Output32Bit
= N
->getValueType(0) == MVT::i32
;
2401 NumSextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 1 : 0;
2402 NumZextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 0 : 1;
2404 SDValue ConvOp
= WideRes
;
2405 if (Input32Bit
!= Output32Bit
)
2406 ConvOp
= addExtOrTrunc(WideRes
, Input32Bit
? ExtOrTruncConversion::Ext
:
2407 ExtOrTruncConversion::Trunc
);
2408 return ConvOp
.getNode();
2411 // Attempt to perform logical operations on the results of comparisons while
2412 // keeping the values in GPRs. Without doing so, these would end up being
2413 // lowered to CR-logical operations which suffer from significant latency and
2415 SDNode
*IntegerCompareEliminator::tryLogicOpOfCompares(SDNode
*N
) {
2416 if (N
->getValueType(0) != MVT::i1
)
2418 assert(isLogicOp(N
->getOpcode()) &&
2419 "Expected a logic operation on setcc results.");
2420 SDValue LoweredLogical
= computeLogicOpInGPR(SDValue(N
, 0));
2421 if (!LoweredLogical
)
2425 bool IsBitwiseNegate
= LoweredLogical
.getMachineOpcode() == PPC::XORI8
;
2426 unsigned SubRegToExtract
= IsBitwiseNegate
? PPC::sub_eq
: PPC::sub_gt
;
2427 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
2428 SDValue LHS
= LoweredLogical
.getOperand(0);
2429 SDValue RHS
= LoweredLogical
.getOperand(1);
2431 SDValue OpToConvToRecForm
;
2433 // Look through any 32-bit to 64-bit implicit extend nodes to find the
2434 // opcode that is input to the XORI.
2435 if (IsBitwiseNegate
&&
2436 LoweredLogical
.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG
)
2437 OpToConvToRecForm
= LoweredLogical
.getOperand(0).getOperand(1);
2438 else if (IsBitwiseNegate
)
2439 // If the input to the XORI isn't an extension, that's what we're after.
2440 OpToConvToRecForm
= LoweredLogical
.getOperand(0);
2442 // If this is not an XORI, it is a reg-reg logical op and we can convert
2443 // it to record-form.
2444 OpToConvToRecForm
= LoweredLogical
;
2446 // Get the record-form version of the node we're looking to use to get the
2448 uint16_t NonRecOpc
= OpToConvToRecForm
.getMachineOpcode();
2449 int NewOpc
= PPCInstrInfo::getRecordFormOpcode(NonRecOpc
);
2451 // Convert the right node to record-form. This is either the logical we're
2452 // looking at or it is the input node to the negation (if we're looking at
2453 // a bitwise negation).
2454 if (NewOpc
!= -1 && IsBitwiseNegate
) {
2455 // The input to the XORI has a record-form. Use it.
2456 assert(LoweredLogical
.getConstantOperandVal(1) == 1 &&
2457 "Expected a PPC::XORI8 only for bitwise negation.");
2458 // Emit the record-form instruction.
2459 std::vector
<SDValue
> Ops
;
2460 for (int i
= 0, e
= OpToConvToRecForm
.getNumOperands(); i
< e
; i
++)
2461 Ops
.push_back(OpToConvToRecForm
.getOperand(i
));
2464 SDValue(CurDAG
->getMachineNode(NewOpc
, dl
,
2465 OpToConvToRecForm
.getValueType(),
2466 MVT::Glue
, Ops
), 0);
2468 assert((NewOpc
!= -1 || !IsBitwiseNegate
) &&
2469 "No record form available for AND8/OR8/XOR8?");
2471 SDValue(CurDAG
->getMachineNode(NewOpc
== -1 ? PPC::ANDIo8
: NewOpc
, dl
,
2472 MVT::i64
, MVT::Glue
, LHS
, RHS
), 0);
2475 // Select this node to a single bit from CR0 set by the record-form node
2476 // just created. For bitwise negation, use the EQ bit which is the equivalent
2477 // of negating the result (i.e. it is a bit set when the result of the
2478 // operation is zero).
2480 CurDAG
->getTargetConstant(SubRegToExtract
, dl
, MVT::i32
);
2482 SDValue(CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
, dl
,
2483 MVT::i1
, CR0Reg
, SRIdxVal
,
2484 WideOp
.getValue(1)), 0);
2485 return CRBit
.getNode();
2488 // Lower a logical operation on i1 values into a GPR sequence if possible.
2489 // The result can be kept in a GPR if requested.
2490 // Three types of inputs can be handled:
2493 // - Logical operation (AND/OR/XOR)
2494 // There is also a special case that is handled (namely a complement operation
2495 // achieved with xor %a, -1).
2496 SDValue
IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp
) {
2497 assert(isLogicOp(LogicOp
.getOpcode()) &&
2498 "Can only handle logic operations here.");
2499 assert(LogicOp
.getValueType() == MVT::i1
&&
2500 "Can only handle logic operations on i1 values here.");
2504 // Special case: xor %a, -1
2505 bool IsBitwiseNegation
= isBitwiseNot(LogicOp
);
2507 // Produces a GPR sequence for each operand of the binary logic operation.
2508 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2509 // the value in a GPR and for logic operations, it will recursively produce
2510 // a GPR sequence for the operation.
2511 auto getLogicOperand
= [&] (SDValue Operand
) -> SDValue
{
2512 unsigned OperandOpcode
= Operand
.getOpcode();
2513 if (OperandOpcode
== ISD::SETCC
)
2514 return getSETCCInGPR(Operand
, SetccInGPROpts::ZExtOrig
);
2515 else if (OperandOpcode
== ISD::TRUNCATE
) {
2516 SDValue InputOp
= Operand
.getOperand(0);
2517 EVT InVT
= InputOp
.getValueType();
2518 return SDValue(CurDAG
->getMachineNode(InVT
== MVT::i32
? PPC::RLDICL_32
:
2519 PPC::RLDICL
, dl
, InVT
, InputOp
,
2520 S
->getI64Imm(0, dl
),
2521 S
->getI64Imm(63, dl
)), 0);
2522 } else if (isLogicOp(OperandOpcode
))
2523 return computeLogicOpInGPR(Operand
);
2526 LHS
= getLogicOperand(LogicOp
.getOperand(0));
2527 RHS
= getLogicOperand(LogicOp
.getOperand(1));
2529 // If a GPR sequence can't be produced for the LHS we can't proceed.
2530 // Not producing a GPR sequence for the RHS is only a problem if this isn't
2531 // a bitwise negation operation.
2532 if (!LHS
|| (!RHS
&& !IsBitwiseNegation
))
2535 NumLogicOpsOnComparison
++;
2537 // We will use the inputs as 64-bit values.
2538 if (LHS
.getValueType() == MVT::i32
)
2539 LHS
= addExtOrTrunc(LHS
, ExtOrTruncConversion::Ext
);
2540 if (!IsBitwiseNegation
&& RHS
.getValueType() == MVT::i32
)
2541 RHS
= addExtOrTrunc(RHS
, ExtOrTruncConversion::Ext
);
2544 switch (LogicOp
.getOpcode()) {
2545 default: llvm_unreachable("Unknown logic operation.");
2546 case ISD::AND
: NewOpc
= PPC::AND8
; break;
2547 case ISD::OR
: NewOpc
= PPC::OR8
; break;
2548 case ISD::XOR
: NewOpc
= PPC::XOR8
; break;
2551 if (IsBitwiseNegation
) {
2552 RHS
= S
->getI64Imm(1, dl
);
2553 NewOpc
= PPC::XORI8
;
2556 return SDValue(CurDAG
->getMachineNode(NewOpc
, dl
, MVT::i64
, LHS
, RHS
), 0);
2560 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2561 /// Otherwise just reinterpret it as a 64-bit value.
2562 /// Useful when emitting comparison code for 32-bit values without using
2563 /// the compare instruction (which only considers the lower 32-bits).
2564 SDValue
IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input
) {
2565 assert(Input
.getValueType() == MVT::i32
&&
2566 "Can only sign-extend 32-bit values here.");
2567 unsigned Opc
= Input
.getOpcode();
2569 // The value was sign extended and then truncated to 32-bits. No need to
2570 // sign extend it again.
2571 if (Opc
== ISD::TRUNCATE
&&
2572 (Input
.getOperand(0).getOpcode() == ISD::AssertSext
||
2573 Input
.getOperand(0).getOpcode() == ISD::SIGN_EXTEND
))
2574 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2576 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2577 // The input is a sign-extending load. All ppc sign-extending loads
2578 // sign-extend to the full 64-bits.
2579 if (InputLoad
&& InputLoad
->getExtensionType() == ISD::SEXTLOAD
)
2580 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2582 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2583 // We don't sign-extend constants.
2585 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2588 SignExtensionsAdded
++;
2589 return SDValue(CurDAG
->getMachineNode(PPC::EXTSW_32_64
, dl
,
2590 MVT::i64
, Input
), 0);
2593 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2594 /// Otherwise just reinterpret it as a 64-bit value.
2595 /// Useful when emitting comparison code for 32-bit values without using
2596 /// the compare instruction (which only considers the lower 32-bits).
2597 SDValue
IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input
) {
2598 assert(Input
.getValueType() == MVT::i32
&&
2599 "Can only zero-extend 32-bit values here.");
2600 unsigned Opc
= Input
.getOpcode();
2602 // The only condition under which we can omit the actual extend instruction:
2603 // - The value is a positive constant
2604 // - The value comes from a load that isn't a sign-extending load
2605 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2606 bool IsTruncateOfZExt
= Opc
== ISD::TRUNCATE
&&
2607 (Input
.getOperand(0).getOpcode() == ISD::AssertZext
||
2608 Input
.getOperand(0).getOpcode() == ISD::ZERO_EXTEND
);
2609 if (IsTruncateOfZExt
)
2610 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2612 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2613 if (InputConst
&& InputConst
->getSExtValue() >= 0)
2614 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2616 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2617 // The input is a load that doesn't sign-extend (it will be zero-extended).
2618 if (InputLoad
&& InputLoad
->getExtensionType() != ISD::SEXTLOAD
)
2619 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2621 // None of the above, need to zero-extend.
2623 ZeroExtensionsAdded
++;
2624 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL_32_64
, dl
, MVT::i64
, Input
,
2625 S
->getI64Imm(0, dl
),
2626 S
->getI64Imm(32, dl
)), 0);
2629 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2630 // course not actual zero/sign extensions that will generate machine code,
2631 // they're just a way to reinterpret a 32 bit value in a register as a
2632 // 64 bit value and vice-versa.
2633 SDValue
IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes
,
2634 ExtOrTruncConversion Conv
) {
2635 SDLoc
dl(NatWidthRes
);
2637 // For reinterpreting 32-bit values as 64 bit values, we generate
2638 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2639 if (Conv
== ExtOrTruncConversion::Ext
) {
2640 SDValue
ImDef(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
, MVT::i64
), 0);
2642 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2643 return SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
, MVT::i64
,
2644 ImDef
, NatWidthRes
, SubRegIdx
), 0);
2647 assert(Conv
== ExtOrTruncConversion::Trunc
&&
2648 "Unknown convertion between 32 and 64 bit values.");
2649 // For reinterpreting 64-bit values as 32-bit values, we just need to
2650 // EXTRACT_SUBREG (i.e. extract the low word).
2652 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2653 return SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
, MVT::i32
,
2654 NatWidthRes
, SubRegIdx
), 0);
2657 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2658 // Handle both zero-extensions and sign-extensions.
2660 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2661 ZeroCompare CmpTy
) {
2662 EVT InVT
= LHS
.getValueType();
2663 bool Is32Bit
= InVT
== MVT::i32
;
2666 // Produce the value that needs to be either zero or sign extended.
2668 case ZeroCompare::GEZExt
:
2669 case ZeroCompare::GESExt
:
2670 ToExtend
= SDValue(CurDAG
->getMachineNode(Is32Bit
? PPC::NOR
: PPC::NOR8
,
2671 dl
, InVT
, LHS
, LHS
), 0);
2673 case ZeroCompare::LEZExt
:
2674 case ZeroCompare::LESExt
: {
2676 // Upper 32 bits cannot be undefined for this sequence.
2677 LHS
= signExtendInputIfNeeded(LHS
);
2679 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2681 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2682 Neg
, S
->getI64Imm(1, dl
),
2683 S
->getI64Imm(63, dl
)), 0);
2686 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
2687 S
->getI64Imm(~0ULL, dl
)), 0);
2688 ToExtend
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2695 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2697 (CmpTy
== ZeroCompare::GEZExt
|| CmpTy
== ZeroCompare::LEZExt
))
2698 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2699 ToExtend
, S
->getI64Imm(1, dl
),
2700 S
->getI64Imm(63, dl
)), 0);
2702 (CmpTy
== ZeroCompare::GESExt
|| CmpTy
== ZeroCompare::LESExt
))
2703 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, ToExtend
,
2704 S
->getI64Imm(63, dl
)), 0);
2706 assert(Is32Bit
&& "Should have handled the 32-bit sequences above.");
2707 // For 32-bit sequences, the extensions differ between GE/LE cases.
2709 case ZeroCompare::GEZExt
: {
2710 SDValue ShiftOps
[] = { ToExtend
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
2711 S
->getI32Imm(31, dl
) };
2712 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2715 case ZeroCompare::GESExt
:
2716 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, ToExtend
,
2717 S
->getI32Imm(31, dl
)), 0);
2718 case ZeroCompare::LEZExt
:
2719 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, ToExtend
,
2720 S
->getI32Imm(1, dl
)), 0);
2721 case ZeroCompare::LESExt
:
2722 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, ToExtend
,
2723 S
->getI32Imm(-1, dl
)), 0);
2726 // The above case covers all the enumerators so it can't have a default clause
2727 // to avoid compiler warnings.
2728 llvm_unreachable("Unknown zero-comparison type.");
2731 /// Produces a zero-extended result of comparing two 32-bit values according to
2732 /// the passed condition code.
2734 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS
, SDValue RHS
,
2736 int64_t RHSValue
, SDLoc dl
) {
2737 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
2738 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Sext
)
2740 bool IsRHSZero
= RHSValue
== 0;
2741 bool IsRHSOne
= RHSValue
== 1;
2742 bool IsRHSNegOne
= RHSValue
== -1LL;
2744 default: return SDValue();
2746 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2747 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
2748 SDValue Xor
= IsRHSZero
? LHS
:
2749 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2751 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2752 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
2753 S
->getI32Imm(31, dl
) };
2754 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2758 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2759 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
2760 SDValue Xor
= IsRHSZero
? LHS
:
2761 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2763 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2764 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
2765 S
->getI32Imm(31, dl
) };
2767 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
2768 return SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
2769 S
->getI32Imm(1, dl
)), 0);
2772 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2773 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
2775 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2777 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2778 // by swapping inputs and falling through.
2779 std::swap(LHS
, RHS
);
2780 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2781 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2785 if (CmpInGPR
== ICGPR_NonExtIn
)
2787 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2788 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
2790 if (CmpInGPR
== ICGPR_NonExtIn
)
2792 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
2795 // The upper 32-bits of the register can't be undefined for this sequence.
2796 LHS
= signExtendInputIfNeeded(LHS
);
2797 RHS
= signExtendInputIfNeeded(RHS
);
2799 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
2801 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Sub
,
2802 S
->getI64Imm(1, dl
), S
->getI64Imm(63, dl
)),
2805 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
,
2806 MVT::i64
, Shift
, S
->getI32Imm(1, dl
)), 0);
2809 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2810 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2811 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
2812 // Handle SETLT -1 (which is equivalent to SETGE 0).
2814 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2817 if (CmpInGPR
== ICGPR_NonExtIn
)
2819 // The upper 32-bits of the register can't be undefined for this sequence.
2820 LHS
= signExtendInputIfNeeded(LHS
);
2821 RHS
= signExtendInputIfNeeded(RHS
);
2823 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2824 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2825 Neg
, S
->getI32Imm(1, dl
), S
->getI32Imm(63, dl
)), 0);
2827 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2828 // (%b < %a) by swapping inputs and falling through.
2829 std::swap(LHS
, RHS
);
2830 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2831 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2832 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
2836 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2837 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
2838 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
2839 // Handle SETLT 1 (which is equivalent to SETLE 0).
2841 if (CmpInGPR
== ICGPR_NonExtIn
)
2843 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
2847 SDValue ShiftOps
[] = { LHS
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
2848 S
->getI32Imm(31, dl
) };
2849 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2853 if (CmpInGPR
== ICGPR_NonExtIn
)
2855 // The upper 32-bits of the register can't be undefined for this sequence.
2856 LHS
= signExtendInputIfNeeded(LHS
);
2857 RHS
= signExtendInputIfNeeded(RHS
);
2859 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
2860 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2861 SUBFNode
, S
->getI64Imm(1, dl
),
2862 S
->getI64Imm(63, dl
)), 0);
2865 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
2866 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
2867 std::swap(LHS
, RHS
);
2870 if (CmpInGPR
== ICGPR_NonExtIn
)
2872 // The upper 32-bits of the register can't be undefined for this sequence.
2873 LHS
= zeroExtendInputIfNeeded(LHS
);
2874 RHS
= zeroExtendInputIfNeeded(RHS
);
2876 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
2878 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2879 Subtract
, S
->getI64Imm(1, dl
),
2880 S
->getI64Imm(63, dl
)), 0);
2881 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, SrdiNode
,
2882 S
->getI32Imm(1, dl
)), 0);
2885 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
2886 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
2887 std::swap(LHS
, RHS
);
2890 if (CmpInGPR
== ICGPR_NonExtIn
)
2892 // The upper 32-bits of the register can't be undefined for this sequence.
2893 LHS
= zeroExtendInputIfNeeded(LHS
);
2894 RHS
= zeroExtendInputIfNeeded(RHS
);
2896 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
2897 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2898 Subtract
, S
->getI64Imm(1, dl
),
2899 S
->getI64Imm(63, dl
)), 0);
2904 /// Produces a sign-extended result of comparing two 32-bit values according to
2905 /// the passed condition code.
2907 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS
, SDValue RHS
,
2909 int64_t RHSValue
, SDLoc dl
) {
2910 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
2911 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Zext
)
2913 bool IsRHSZero
= RHSValue
== 0;
2914 bool IsRHSOne
= RHSValue
== 1;
2915 bool IsRHSNegOne
= RHSValue
== -1LL;
2918 default: return SDValue();
2920 // (sext (setcc %a, %b, seteq)) ->
2921 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
2922 // (sext (setcc %a, 0, seteq)) ->
2923 // (ashr (shl (ctlz %a), 58), 63)
2924 SDValue CountInput
= IsRHSZero
? LHS
:
2925 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2927 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, CountInput
), 0);
2928 SDValue SHLOps
[] = { Cntlzw
, S
->getI32Imm(27, dl
),
2929 S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
2931 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, SHLOps
), 0);
2932 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Slwi
), 0);
2935 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
2936 // flip the bit, finally take 2's complement.
2937 // (sext (setcc %a, %b, setne)) ->
2938 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
2939 // Same as above, but the first xor is not needed.
2940 // (sext (setcc %a, 0, setne)) ->
2941 // (neg (xor (lshr (ctlz %a), 5), 1))
2942 SDValue Xor
= IsRHSZero
? LHS
:
2943 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2945 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2946 SDValue ShiftOps
[] =
2947 { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
2949 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
2951 SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
2952 S
->getI32Imm(1, dl
)), 0);
2953 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Xori
), 0);
2956 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
2957 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
2959 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
2961 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2962 // by swapping inputs and falling through.
2963 std::swap(LHS
, RHS
);
2964 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2965 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2969 if (CmpInGPR
== ICGPR_NonExtIn
)
2971 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
2972 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
2974 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
2976 // The upper 32-bits of the register can't be undefined for this sequence.
2977 LHS
= signExtendInputIfNeeded(LHS
);
2978 RHS
= signExtendInputIfNeeded(RHS
);
2980 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, MVT::Glue
,
2983 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2984 SUBFNode
, S
->getI64Imm(1, dl
),
2985 S
->getI64Imm(63, dl
)), 0);
2986 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Srdi
,
2987 S
->getI32Imm(-1, dl
)), 0);
2990 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
2991 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
2992 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
2994 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
2996 if (CmpInGPR
== ICGPR_NonExtIn
)
2998 // The upper 32-bits of the register can't be undefined for this sequence.
2999 LHS
= signExtendInputIfNeeded(LHS
);
3000 RHS
= signExtendInputIfNeeded(RHS
);
3002 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
3003 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Neg
,
3004 S
->getI64Imm(63, dl
)), 0);
3006 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3007 // (%b < %a) by swapping inputs and falling through.
3008 std::swap(LHS
, RHS
);
3009 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3010 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3011 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3015 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3016 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3017 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3019 if (CmpInGPR
== ICGPR_NonExtIn
)
3021 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3024 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, LHS
,
3025 S
->getI32Imm(31, dl
)), 0);
3027 if (CmpInGPR
== ICGPR_NonExtIn
)
3029 // The upper 32-bits of the register can't be undefined for this sequence.
3030 LHS
= signExtendInputIfNeeded(LHS
);
3031 RHS
= signExtendInputIfNeeded(RHS
);
3033 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3034 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3035 SUBFNode
, S
->getI64Imm(63, dl
)), 0);
3038 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3039 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3040 std::swap(LHS
, RHS
);
3043 if (CmpInGPR
== ICGPR_NonExtIn
)
3045 // The upper 32-bits of the register can't be undefined for this sequence.
3046 LHS
= zeroExtendInputIfNeeded(LHS
);
3047 RHS
= zeroExtendInputIfNeeded(RHS
);
3049 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3051 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Subtract
,
3052 S
->getI32Imm(1, dl
), S
->getI32Imm(63,dl
)),
3054 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Shift
,
3055 S
->getI32Imm(-1, dl
)), 0);
3058 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3059 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3060 std::swap(LHS
, RHS
);
3063 if (CmpInGPR
== ICGPR_NonExtIn
)
3065 // The upper 32-bits of the register can't be undefined for this sequence.
3066 LHS
= zeroExtendInputIfNeeded(LHS
);
3067 RHS
= zeroExtendInputIfNeeded(RHS
);
3069 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3070 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3071 Subtract
, S
->getI64Imm(63, dl
)), 0);
3076 /// Produces a zero-extended result of comparing two 64-bit values according to
3077 /// the passed condition code.
3079 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS
, SDValue RHS
,
3081 int64_t RHSValue
, SDLoc dl
) {
3082 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3083 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Sext
)
3085 bool IsRHSZero
= RHSValue
== 0;
3086 bool IsRHSOne
= RHSValue
== 1;
3087 bool IsRHSNegOne
= RHSValue
== -1LL;
3089 default: return SDValue();
3091 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3092 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3093 SDValue Xor
= IsRHSZero
? LHS
:
3094 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3096 SDValue(CurDAG
->getMachineNode(PPC::CNTLZD
, dl
, MVT::i64
, Xor
), 0);
3097 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Clz
,
3098 S
->getI64Imm(58, dl
),
3099 S
->getI64Imm(63, dl
)), 0);
3102 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3103 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3104 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3105 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3106 SDValue Xor
= IsRHSZero
? LHS
:
3107 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3109 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3110 Xor
, S
->getI32Imm(~0U, dl
)), 0);
3111 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, AC
,
3112 Xor
, AC
.getValue(1)), 0);
3115 // {subc.reg, subc.CA} = (subcarry %a, %b)
3116 // (zext (setcc %a, %b, setge)) ->
3117 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3118 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3120 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3121 std::swap(LHS
, RHS
);
3122 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3123 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3127 // {subc.reg, subc.CA} = (subcarry %b, %a)
3128 // (zext (setcc %a, %b, setge)) ->
3129 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3130 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3132 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3134 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3135 S
->getI64Imm(1, dl
),
3136 S
->getI64Imm(63, dl
)), 0);
3138 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3139 S
->getI64Imm(63, dl
)), 0);
3140 SDValue SubtractCarry
=
3141 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3143 return SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3144 ShiftR
, ShiftL
, SubtractCarry
), 0);
3147 // {subc.reg, subc.CA} = (subcarry %b, %a)
3148 // (zext (setcc %a, %b, setgt)) ->
3149 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3150 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3152 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3155 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3156 S
->getI64Imm(~0ULL, dl
)), 0);
3158 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Addi
, LHS
), 0);
3159 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Nor
,
3160 S
->getI64Imm(1, dl
),
3161 S
->getI64Imm(63, dl
)), 0);
3163 std::swap(LHS
, RHS
);
3164 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3165 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3166 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3170 // {subc.reg, subc.CA} = (subcarry %a, %b)
3171 // (zext (setcc %a, %b, setlt)) ->
3172 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3173 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3175 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3177 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3178 S
->getI64Imm(1, dl
),
3179 S
->getI64Imm(63, dl
)), 0);
3181 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3182 LHS
, S
->getI64Imm(63, dl
)), 0);
3184 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3185 RHS
, S
->getI64Imm(1, dl
),
3186 S
->getI64Imm(63, dl
)), 0);
3187 SDValue SUBFC8Carry
=
3188 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3191 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3192 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3193 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3194 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3197 // {subc.reg, subc.CA} = (subcarry %a, %b)
3198 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3199 std::swap(LHS
, RHS
);
3202 // {subc.reg, subc.CA} = (subcarry %b, %a)
3203 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3204 SDValue SUBFC8Carry
=
3205 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3207 SDValue SUBFE8Node
=
3208 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
,
3209 LHS
, LHS
, SUBFC8Carry
), 0);
3210 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
,
3211 SUBFE8Node
, S
->getI64Imm(1, dl
)), 0);
3214 // {subc.reg, subc.CA} = (subcarry %b, %a)
3215 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3216 std::swap(LHS
, RHS
);
3219 // {subc.reg, subc.CA} = (subcarry %a, %b)
3220 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3221 SDValue SubtractCarry
=
3222 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3225 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3226 LHS
, LHS
, SubtractCarry
), 0);
3227 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3233 /// Produces a sign-extended result of comparing two 64-bit values according to
3234 /// the passed condition code.
3236 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS
, SDValue RHS
,
3238 int64_t RHSValue
, SDLoc dl
) {
3239 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3240 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Zext
)
3242 bool IsRHSZero
= RHSValue
== 0;
3243 bool IsRHSOne
= RHSValue
== 1;
3244 bool IsRHSNegOne
= RHSValue
== -1LL;
3246 default: return SDValue();
3248 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3249 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3250 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3251 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3252 SDValue AddInput
= IsRHSZero
? LHS
:
3253 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3255 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3256 AddInput
, S
->getI32Imm(~0U, dl
)), 0);
3257 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, Addic
,
3258 Addic
, Addic
.getValue(1)), 0);
3261 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3262 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3263 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3264 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3265 SDValue Xor
= IsRHSZero
? LHS
:
3266 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3268 SDValue(CurDAG
->getMachineNode(PPC::SUBFIC8
, dl
, MVT::i64
, MVT::Glue
,
3269 Xor
, S
->getI32Imm(0, dl
)), 0);
3270 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, SC
,
3271 SC
, SC
.getValue(1)), 0);
3274 // {subc.reg, subc.CA} = (subcarry %a, %b)
3275 // (zext (setcc %a, %b, setge)) ->
3276 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3277 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3279 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3280 std::swap(LHS
, RHS
);
3281 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3282 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3286 // {subc.reg, subc.CA} = (subcarry %b, %a)
3287 // (zext (setcc %a, %b, setge)) ->
3288 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3289 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3291 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3293 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3294 S
->getI64Imm(63, dl
)), 0);
3296 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3297 S
->getI64Imm(1, dl
),
3298 S
->getI64Imm(63, dl
)), 0);
3299 SDValue SubtractCarry
=
3300 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3303 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3304 ShiftR
, ShiftL
, SubtractCarry
), 0);
3305 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, Adde
), 0);
3308 // {subc.reg, subc.CA} = (subcarry %b, %a)
3309 // (zext (setcc %a, %b, setgt)) ->
3310 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3311 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3313 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3316 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3317 S
->getI64Imm(-1, dl
)), 0);
3319 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Add
, LHS
), 0);
3320 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Nor
,
3321 S
->getI64Imm(63, dl
)), 0);
3323 std::swap(LHS
, RHS
);
3324 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3325 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3326 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3330 // {subc.reg, subc.CA} = (subcarry %a, %b)
3331 // (zext (setcc %a, %b, setlt)) ->
3332 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3333 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3335 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3337 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, LHS
,
3338 S
->getI64Imm(63, dl
)), 0);
3341 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3342 LHS
, S
->getI64Imm(63, dl
)), 0);
3344 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3345 RHS
, S
->getI64Imm(1, dl
),
3346 S
->getI64Imm(63, dl
)), 0);
3347 SDValue SUBFC8Carry
=
3348 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3351 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
,
3352 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3354 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3355 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3356 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3360 // {subc.reg, subc.CA} = (subcarry %a, %b)
3361 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3362 std::swap(LHS
, RHS
);
3365 // {subc.reg, subc.CA} = (subcarry %b, %a)
3366 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3367 SDValue SubtractCarry
=
3368 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3371 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
, LHS
,
3372 LHS
, SubtractCarry
), 0);
3373 return SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
,
3374 ExtSub
, ExtSub
), 0);
3377 // {subc.reg, subc.CA} = (subcarry %b, %a)
3378 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3379 std::swap(LHS
, RHS
);
3382 // {subc.reg, subc.CA} = (subcarry %a, %b)
3383 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3385 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3387 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3388 LHS
, LHS
, SubCarry
), 0);
3393 /// Do all uses of this SDValue need the result in a GPR?
3394 /// This is meant to be used on values that have type i1 since
3395 /// it is somewhat meaningless to ask if values of other types
3396 /// should be kept in GPR's.
3397 static bool allUsesExtend(SDValue Compare
, SelectionDAG
*CurDAG
) {
3398 assert(Compare
.getOpcode() == ISD::SETCC
&&
3399 "An ISD::SETCC node required here.");
3401 // For values that have a single use, the caller should obviously already have
3402 // checked if that use is an extending use. We check the other uses here.
3403 if (Compare
.hasOneUse())
3405 // We want the value in a GPR if it is being extended, used for a select, or
3406 // used in logical operations.
3407 for (auto CompareUse
: Compare
.getNode()->uses())
3408 if (CompareUse
->getOpcode() != ISD::SIGN_EXTEND
&&
3409 CompareUse
->getOpcode() != ISD::ZERO_EXTEND
&&
3410 CompareUse
->getOpcode() != ISD::SELECT
&&
3411 !isLogicOp(CompareUse
->getOpcode())) {
3412 OmittedForNonExtendUses
++;
3418 /// Returns an equivalent of a SETCC node but with the result the same width as
3419 /// the inputs. This can also be used for SELECT_CC if either the true or false
3420 /// values is a power of two while the other is zero.
3421 SDValue
IntegerCompareEliminator::getSETCCInGPR(SDValue Compare
,
3422 SetccInGPROpts ConvOpts
) {
3423 assert((Compare
.getOpcode() == ISD::SETCC
||
3424 Compare
.getOpcode() == ISD::SELECT_CC
) &&
3425 "An ISD::SETCC node required here.");
3427 // Don't convert this comparison to a GPR sequence because there are uses
3428 // of the i1 result (i.e. uses that require the result in the CR).
3429 if ((Compare
.getOpcode() == ISD::SETCC
) && !allUsesExtend(Compare
, CurDAG
))
3432 SDValue LHS
= Compare
.getOperand(0);
3433 SDValue RHS
= Compare
.getOperand(1);
3435 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3436 int CCOpNum
= Compare
.getOpcode() == ISD::SELECT_CC
? 4 : 2;
3438 cast
<CondCodeSDNode
>(Compare
.getOperand(CCOpNum
))->get();
3439 EVT InputVT
= LHS
.getValueType();
3440 if (InputVT
!= MVT::i32
&& InputVT
!= MVT::i64
)
3443 if (ConvOpts
== SetccInGPROpts::ZExtInvert
||
3444 ConvOpts
== SetccInGPROpts::SExtInvert
)
3445 CC
= ISD::getSetCCInverse(CC
, true);
3447 bool Inputs32Bit
= InputVT
== MVT::i32
;
3450 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3451 int64_t RHSValue
= RHSConst
? RHSConst
->getSExtValue() : INT64_MAX
;
3452 bool IsSext
= ConvOpts
== SetccInGPROpts::SExtOrig
||
3453 ConvOpts
== SetccInGPROpts::SExtInvert
;
3455 if (IsSext
&& Inputs32Bit
)
3456 return get32BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3457 else if (Inputs32Bit
)
3458 return get32BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3460 return get64BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3461 return get64BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3464 } // end anonymous namespace
3466 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode
*N
) {
3467 if (N
->getValueType(0) != MVT::i32
&&
3468 N
->getValueType(0) != MVT::i64
)
3471 // This optimization will emit code that assumes 64-bit registers
3472 // so we don't want to run it in 32-bit mode. Also don't run it
3473 // on functions that are not to be optimized.
3474 if (TM
.getOptLevel() == CodeGenOpt::None
|| !TM
.isPPC64())
3477 switch (N
->getOpcode()) {
3479 case ISD::ZERO_EXTEND
:
3480 case ISD::SIGN_EXTEND
:
3484 IntegerCompareEliminator
ICmpElim(CurDAG
, this);
3485 if (SDNode
*New
= ICmpElim
.Select(N
)) {
3486 ReplaceNode(N
, New
);
3494 bool PPCDAGToDAGISel::tryBitPermutation(SDNode
*N
) {
3495 if (N
->getValueType(0) != MVT::i32
&&
3496 N
->getValueType(0) != MVT::i64
)
3499 if (!UseBitPermRewriter
)
3502 switch (N
->getOpcode()) {
3509 BitPermutationSelector
BPS(CurDAG
);
3510 if (SDNode
*New
= BPS
.Select(N
)) {
3511 ReplaceNode(N
, New
);
3521 /// SelectCC - Select a comparison of the specified values with the specified
3522 /// condition code, returning the CR# of the expression.
3523 SDValue
PPCDAGToDAGISel::SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
3525 // Always select the LHS.
3528 if (LHS
.getValueType() == MVT::i32
) {
3530 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3531 if (isInt32Immediate(RHS
, Imm
)) {
3532 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3533 if (isUInt
<16>(Imm
))
3534 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3535 getI32Imm(Imm
& 0xFFFF, dl
)),
3537 // If this is a 16-bit signed immediate, fold it.
3538 if (isInt
<16>((int)Imm
))
3539 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3540 getI32Imm(Imm
& 0xFFFF, dl
)),
3543 // For non-equality comparisons, the default code would materialize the
3544 // constant, then compare against it, like this:
3546 // ori r2, r2, 22136
3548 // Since we are just comparing for equality, we can emit this instead:
3549 // xoris r0,r3,0x1234
3550 // cmplwi cr0,r0,0x5678
3552 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS
, dl
, MVT::i32
, LHS
,
3553 getI32Imm(Imm
>> 16, dl
)), 0);
3554 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, Xor
,
3555 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3558 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3559 if (isInt32Immediate(RHS
, Imm
) && isUInt
<16>(Imm
))
3560 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3561 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3565 if (isIntS16Immediate(RHS
, SImm
))
3566 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3567 getI32Imm((int)SImm
& 0xFFFF,
3572 } else if (LHS
.getValueType() == MVT::i64
) {
3574 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3575 if (isInt64Immediate(RHS
.getNode(), Imm
)) {
3576 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3577 if (isUInt
<16>(Imm
))
3578 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3579 getI32Imm(Imm
& 0xFFFF, dl
)),
3581 // If this is a 16-bit signed immediate, fold it.
3583 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3584 getI32Imm(Imm
& 0xFFFF, dl
)),
3587 // For non-equality comparisons, the default code would materialize the
3588 // constant, then compare against it, like this:
3590 // ori r2, r2, 22136
3592 // Since we are just comparing for equality, we can emit this instead:
3593 // xoris r0,r3,0x1234
3594 // cmpldi cr0,r0,0x5678
3596 if (isUInt
<32>(Imm
)) {
3597 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS8
, dl
, MVT::i64
, LHS
,
3598 getI64Imm(Imm
>> 16, dl
)), 0);
3599 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, Xor
,
3600 getI64Imm(Imm
& 0xFFFF, dl
)),
3605 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3606 if (isInt64Immediate(RHS
.getNode(), Imm
) && isUInt
<16>(Imm
))
3607 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3608 getI64Imm(Imm
& 0xFFFF, dl
)), 0);
3612 if (isIntS16Immediate(RHS
, SImm
))
3613 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3614 getI64Imm(SImm
& 0xFFFF, dl
)),
3618 } else if (LHS
.getValueType() == MVT::f32
) {
3619 if (PPCSubTarget
->hasSPE()) {
3624 Opc
= PPC::EFSCMPEQ
;
3632 Opc
= PPC::EFSCMPLT
;
3640 Opc
= PPC::EFSCMPGT
;
3645 } else if (LHS
.getValueType() == MVT::f64
) {
3646 if (PPCSubTarget
->hasSPE()) {
3651 Opc
= PPC::EFDCMPEQ
;
3659 Opc
= PPC::EFDCMPLT
;
3667 Opc
= PPC::EFDCMPGT
;
3671 Opc
= PPCSubTarget
->hasVSX() ? PPC::XSCMPUDP
: PPC::FCMPUD
;
3673 assert(LHS
.getValueType() == MVT::f128
&& "Unknown vt!");
3674 assert(PPCSubTarget
->hasVSX() && "__float128 requires VSX");
3675 Opc
= PPC::XSCMPUQP
;
3677 return SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, LHS
, RHS
), 0);
3680 static PPC::Predicate
getPredicateForSetCC(ISD::CondCode CC
) {
3686 llvm_unreachable("Should be lowered by legalize!");
3687 default: llvm_unreachable("Unknown condition!");
3689 case ISD::SETEQ
: return PPC::PRED_EQ
;
3691 case ISD::SETNE
: return PPC::PRED_NE
;
3693 case ISD::SETLT
: return PPC::PRED_LT
;
3695 case ISD::SETLE
: return PPC::PRED_LE
;
3697 case ISD::SETGT
: return PPC::PRED_GT
;
3699 case ISD::SETGE
: return PPC::PRED_GE
;
3700 case ISD::SETO
: return PPC::PRED_NU
;
3701 case ISD::SETUO
: return PPC::PRED_UN
;
3702 // These two are invalid for floating point. Assume we have int.
3703 case ISD::SETULT
: return PPC::PRED_LT
;
3704 case ISD::SETUGT
: return PPC::PRED_GT
;
3708 /// getCRIdxForSetCC - Return the index of the condition register field
3709 /// associated with the SetCC condition, and whether or not the field is
3710 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
3711 static unsigned getCRIdxForSetCC(ISD::CondCode CC
, bool &Invert
) {
3714 default: llvm_unreachable("Unknown condition!");
3716 case ISD::SETLT
: return 0; // Bit #0 = SETOLT
3718 case ISD::SETGT
: return 1; // Bit #1 = SETOGT
3720 case ISD::SETEQ
: return 2; // Bit #2 = SETOEQ
3721 case ISD::SETUO
: return 3; // Bit #3 = SETUO
3723 case ISD::SETGE
: Invert
= true; return 0; // !Bit #0 = SETUGE
3725 case ISD::SETLE
: Invert
= true; return 1; // !Bit #1 = SETULE
3727 case ISD::SETNE
: Invert
= true; return 2; // !Bit #2 = SETUNE
3728 case ISD::SETO
: Invert
= true; return 3; // !Bit #3 = SETO
3733 llvm_unreachable("Invalid branch code: should be expanded by legalize");
3734 // These are invalid for floating point. Assume integer.
3735 case ISD::SETULT
: return 0;
3736 case ISD::SETUGT
: return 1;
3740 // getVCmpInst: return the vector compare instruction for the specified
3741 // vector type and condition code. Since this is for altivec specific code,
3742 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3743 static unsigned int getVCmpInst(MVT VecVT
, ISD::CondCode CC
,
3744 bool HasVSX
, bool &Swap
, bool &Negate
) {
3748 if (VecVT
.isFloatingPoint()) {
3749 /* Handle some cases by swapping input operands. */
3751 case ISD::SETLE
: CC
= ISD::SETGE
; Swap
= true; break;
3752 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
3753 case ISD::SETOLE
: CC
= ISD::SETOGE
; Swap
= true; break;
3754 case ISD::SETOLT
: CC
= ISD::SETOGT
; Swap
= true; break;
3755 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
3756 case ISD::SETUGT
: CC
= ISD::SETULT
; Swap
= true; break;
3759 /* Handle some cases by negating the result. */
3761 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
3762 case ISD::SETUNE
: CC
= ISD::SETOEQ
; Negate
= true; break;
3763 case ISD::SETULE
: CC
= ISD::SETOGT
; Negate
= true; break;
3764 case ISD::SETULT
: CC
= ISD::SETOGE
; Negate
= true; break;
3767 /* We have instructions implementing the remaining cases. */
3771 if (VecVT
== MVT::v4f32
)
3772 return HasVSX
? PPC::XVCMPEQSP
: PPC::VCMPEQFP
;
3773 else if (VecVT
== MVT::v2f64
)
3774 return PPC::XVCMPEQDP
;
3778 if (VecVT
== MVT::v4f32
)
3779 return HasVSX
? PPC::XVCMPGTSP
: PPC::VCMPGTFP
;
3780 else if (VecVT
== MVT::v2f64
)
3781 return PPC::XVCMPGTDP
;
3785 if (VecVT
== MVT::v4f32
)
3786 return HasVSX
? PPC::XVCMPGESP
: PPC::VCMPGEFP
;
3787 else if (VecVT
== MVT::v2f64
)
3788 return PPC::XVCMPGEDP
;
3793 llvm_unreachable("Invalid floating-point vector compare condition");
3795 /* Handle some cases by swapping input operands. */
3797 case ISD::SETGE
: CC
= ISD::SETLE
; Swap
= true; break;
3798 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
3799 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
3800 case ISD::SETULT
: CC
= ISD::SETUGT
; Swap
= true; break;
3803 /* Handle some cases by negating the result. */
3805 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
3806 case ISD::SETUNE
: CC
= ISD::SETUEQ
; Negate
= true; break;
3807 case ISD::SETLE
: CC
= ISD::SETGT
; Negate
= true; break;
3808 case ISD::SETULE
: CC
= ISD::SETUGT
; Negate
= true; break;
3811 /* We have instructions implementing the remaining cases. */
3815 if (VecVT
== MVT::v16i8
)
3816 return PPC::VCMPEQUB
;
3817 else if (VecVT
== MVT::v8i16
)
3818 return PPC::VCMPEQUH
;
3819 else if (VecVT
== MVT::v4i32
)
3820 return PPC::VCMPEQUW
;
3821 else if (VecVT
== MVT::v2i64
)
3822 return PPC::VCMPEQUD
;
3825 if (VecVT
== MVT::v16i8
)
3826 return PPC::VCMPGTSB
;
3827 else if (VecVT
== MVT::v8i16
)
3828 return PPC::VCMPGTSH
;
3829 else if (VecVT
== MVT::v4i32
)
3830 return PPC::VCMPGTSW
;
3831 else if (VecVT
== MVT::v2i64
)
3832 return PPC::VCMPGTSD
;
3835 if (VecVT
== MVT::v16i8
)
3836 return PPC::VCMPGTUB
;
3837 else if (VecVT
== MVT::v8i16
)
3838 return PPC::VCMPGTUH
;
3839 else if (VecVT
== MVT::v4i32
)
3840 return PPC::VCMPGTUW
;
3841 else if (VecVT
== MVT::v2i64
)
3842 return PPC::VCMPGTUD
;
3847 llvm_unreachable("Invalid integer vector compare condition");
3851 bool PPCDAGToDAGISel::trySETCC(SDNode
*N
) {
3854 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
3856 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
3857 bool isPPC64
= (PtrVT
== MVT::i64
);
3859 if (!PPCSubTarget
->useCRBits() &&
3860 isInt32Immediate(N
->getOperand(1), Imm
)) {
3861 // We can codegen setcc op, imm very efficiently compared to a brcond.
3862 // Check for those cases here.
3865 SDValue Op
= N
->getOperand(0);
3869 Op
= SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Op
), 0);
3870 SDValue Ops
[] = { Op
, getI32Imm(27, dl
), getI32Imm(5, dl
),
3871 getI32Imm(31, dl
) };
3872 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3878 SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
3879 Op
, getI32Imm(~0U, dl
)), 0);
3880 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, AD
, Op
, AD
.getValue(1));
3884 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
3885 getI32Imm(31, dl
) };
3886 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3891 SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Op
), 0);
3892 T
= SDValue(CurDAG
->getMachineNode(PPC::ANDC
, dl
, MVT::i32
, T
, Op
), 0);
3893 SDValue Ops
[] = { T
, getI32Imm(1, dl
), getI32Imm(31, dl
),
3894 getI32Imm(31, dl
) };
3895 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3899 } else if (Imm
== ~0U) { // setcc op, -1
3900 SDValue Op
= N
->getOperand(0);
3905 Op
= SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
3906 Op
, getI32Imm(1, dl
)), 0);
3907 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
,
3908 SDValue(CurDAG
->getMachineNode(PPC::LI
, dl
,
3911 0), Op
.getValue(1));
3915 Op
= SDValue(CurDAG
->getMachineNode(PPC::NOR
, dl
, MVT::i32
, Op
, Op
), 0);
3916 SDNode
*AD
= CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
3917 Op
, getI32Imm(~0U, dl
));
3918 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(AD
, 0), Op
,
3923 SDValue AD
= SDValue(CurDAG
->getMachineNode(PPC::ADDI
, dl
, MVT::i32
, Op
,
3924 getI32Imm(1, dl
)), 0);
3925 SDValue AN
= SDValue(CurDAG
->getMachineNode(PPC::AND
, dl
, MVT::i32
, AD
,
3927 SDValue Ops
[] = { AN
, getI32Imm(1, dl
), getI32Imm(31, dl
),
3928 getI32Imm(31, dl
) };
3929 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3933 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
3934 getI32Imm(31, dl
) };
3935 Op
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
3936 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Op
, getI32Imm(1, dl
));
3943 SDValue LHS
= N
->getOperand(0);
3944 SDValue RHS
= N
->getOperand(1);
3946 // Altivec Vector compare instructions do not set any CR register by default and
3947 // vector compare operations return the same type as the operands.
3948 if (LHS
.getValueType().isVector()) {
3949 if (PPCSubTarget
->hasQPX() || PPCSubTarget
->hasSPE())
3952 EVT VecVT
= LHS
.getValueType();
3954 unsigned int VCmpInst
= getVCmpInst(VecVT
.getSimpleVT(), CC
,
3955 PPCSubTarget
->hasVSX(), Swap
, Negate
);
3957 std::swap(LHS
, RHS
);
3959 EVT ResVT
= VecVT
.changeVectorElementTypeToInteger();
3961 SDValue
VCmp(CurDAG
->getMachineNode(VCmpInst
, dl
, ResVT
, LHS
, RHS
), 0);
3962 CurDAG
->SelectNodeTo(N
, PPCSubTarget
->hasVSX() ? PPC::XXLNOR
: PPC::VNOR
,
3967 CurDAG
->SelectNodeTo(N
, VCmpInst
, ResVT
, LHS
, RHS
);
3971 if (PPCSubTarget
->useCRBits())
3975 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
3976 SDValue CCReg
= SelectCC(LHS
, RHS
, CC
, dl
);
3979 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
3980 // The correct compare instruction is already set by SelectCC()
3981 if (PPCSubTarget
->hasSPE() && LHS
.getValueType().isFloatingPoint()) {
3985 // Force the ccreg into CR7.
3986 SDValue CR7Reg
= CurDAG
->getRegister(PPC::CR7
, MVT::i32
);
3988 SDValue
InFlag(nullptr, 0); // Null incoming flag value.
3989 CCReg
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, CR7Reg
, CCReg
,
3990 InFlag
).getValue(1);
3992 IntCR
= SDValue(CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
, CR7Reg
,
3995 SDValue Ops
[] = { IntCR
, getI32Imm((32 - (3 - Idx
)) & 31, dl
),
3996 getI32Imm(31, dl
), getI32Imm(31, dl
) };
3998 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4002 // Get the specified bit.
4004 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4005 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Tmp
, getI32Imm(1, dl
));
4009 /// Does this node represent a load/store node whose address can be represented
4010 /// with a register plus an immediate that's a multiple of \p Val:
4011 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const {
4012 LoadSDNode
*LDN
= dyn_cast
<LoadSDNode
>(N
);
4013 StoreSDNode
*STN
= dyn_cast
<StoreSDNode
>(N
);
4016 AddrOp
= LDN
->getOperand(1);
4018 AddrOp
= STN
->getOperand(2);
4020 // If the address points a frame object or a frame object with an offset,
4021 // we need to check the object alignment.
4023 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(
4024 AddrOp
.getOpcode() == ISD::ADD
? AddrOp
.getOperand(0) :
4026 // If op0 is a frame index that is under aligned, we can't do it either,
4027 // because it is translated to r31 or r1 + slot + offset. We won't know the
4028 // slot number until the stack frame is finalized.
4029 const MachineFrameInfo
&MFI
= CurDAG
->getMachineFunction().getFrameInfo();
4030 unsigned SlotAlign
= MFI
.getObjectAlignment(FI
->getIndex());
4031 if ((SlotAlign
% Val
) != 0)
4034 // If we have an offset, we need further check on the offset.
4035 if (AddrOp
.getOpcode() != ISD::ADD
)
4039 if (AddrOp
.getOpcode() == ISD::ADD
)
4040 return isIntS16Immediate(AddrOp
.getOperand(1), Imm
) && !(Imm
% Val
);
4042 // If the address comes from the outside, the offset will be zero.
4043 return AddrOp
.getOpcode() == ISD::CopyFromReg
;
4046 void PPCDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
4047 // Transfer memoperands.
4048 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
4049 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
4052 /// This method returns a node after flipping the MSB of each element
4053 /// of vector integer type. Additionally, if SignBitVec is non-null,
4054 /// this method sets a node with one at MSB of all elements
4055 /// and zero at other bits in SignBitVec.
4057 PPCDAGToDAGISel::flipSignBit(const SDValue
&N
, SDNode
**SignBitVec
) {
4059 EVT VecVT
= N
.getValueType();
4060 if (VecVT
== MVT::v4i32
) {
4062 SDNode
*ZV
= CurDAG
->getMachineNode(PPC::V_SET0
, dl
, MVT::v4i32
);
4063 *SignBitVec
= CurDAG
->getMachineNode(PPC::XVNEGSP
, dl
, VecVT
,
4066 return CurDAG
->getMachineNode(PPC::XVNEGSP
, dl
, VecVT
, N
);
4068 else if (VecVT
== MVT::v8i16
) {
4069 SDNode
*Hi
= CurDAG
->getMachineNode(PPC::LIS
, dl
, MVT::i32
,
4070 getI32Imm(0x8000, dl
));
4071 SDNode
*ScaImm
= CurDAG
->getMachineNode(PPC::ORI
, dl
, MVT::i32
,
4073 getI32Imm(0x8000, dl
));
4074 SDNode
*VecImm
= CurDAG
->getMachineNode(PPC::MTVSRWS
, dl
, VecVT
,
4075 SDValue(ScaImm
, 0));
4077 Alternatively, we can do this as follow to use VRF instead of GPR.
4082 if (SignBitVec
) *SignBitVec
= VecImm
;
4083 return CurDAG
->getMachineNode(PPC::VADDUHM
, dl
, VecVT
, N
,
4084 SDValue(VecImm
, 0));
4086 else if (VecVT
== MVT::v16i8
) {
4087 SDNode
*VecImm
= CurDAG
->getMachineNode(PPC::XXSPLTIB
, dl
, MVT::i32
,
4088 getI32Imm(0x80, dl
));
4089 if (SignBitVec
) *SignBitVec
= VecImm
;
4090 return CurDAG
->getMachineNode(PPC::VADDUBM
, dl
, VecVT
, N
,
4091 SDValue(VecImm
, 0));
4094 llvm_unreachable("Unsupported vector data type for flipSignBit");
4097 // Select - Convert the specified operand from a target-independent to a
4098 // target-specific node if it hasn't already been changed.
4099 void PPCDAGToDAGISel::Select(SDNode
*N
) {
4101 if (N
->isMachineOpcode()) {
4103 return; // Already selected.
4106 // In case any misguided DAG-level optimizations form an ADD with a
4107 // TargetConstant operand, crash here instead of miscompiling (by selecting
4108 // an r+r add instead of some kind of r+i add).
4109 if (N
->getOpcode() == ISD::ADD
&&
4110 N
->getOperand(1).getOpcode() == ISD::TargetConstant
)
4111 llvm_unreachable("Invalid ADD with TargetConstant operand");
4113 // Try matching complex bit permutations before doing anything else.
4114 if (tryBitPermutation(N
))
4117 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4118 if (tryIntCompareInGPR(N
))
4121 switch (N
->getOpcode()) {
4125 if (N
->getValueType(0) == MVT::i64
) {
4126 ReplaceNode(N
, selectI64Imm(CurDAG
, N
));
4136 case PPCISD::CALL
: {
4137 const Module
*M
= MF
->getFunction().getParent();
4139 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) != MVT::i32
||
4140 !PPCSubTarget
->isSecurePlt() || !PPCSubTarget
->isTargetELF() ||
4141 M
->getPICLevel() == PICLevel::SmallPIC
)
4144 SDValue Op
= N
->getOperand(1);
4146 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
4147 if (GA
->getTargetFlags() == PPCII::MO_PLT
)
4150 else if (ExternalSymbolSDNode
*ES
= dyn_cast
<ExternalSymbolSDNode
>(Op
)) {
4151 if (ES
->getTargetFlags() == PPCII::MO_PLT
)
4157 case PPCISD::GlobalBaseReg
:
4158 ReplaceNode(N
, getGlobalBaseReg());
4161 case ISD::FrameIndex
:
4162 selectFrameIndex(N
, N
);
4165 case PPCISD::MFOCRF
: {
4166 SDValue InFlag
= N
->getOperand(1);
4167 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
,
4168 N
->getOperand(0), InFlag
));
4172 case PPCISD::READ_TIME_BASE
:
4173 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ReadTB
, dl
, MVT::i32
, MVT::i32
,
4174 MVT::Other
, N
->getOperand(0)));
4177 case PPCISD::SRA_ADDZE
: {
4178 SDValue N0
= N
->getOperand(0);
4180 CurDAG
->getTargetConstant(*cast
<ConstantSDNode
>(N
->getOperand(1))->
4181 getConstantIntValue(), dl
,
4182 N
->getValueType(0));
4183 if (N
->getValueType(0) == MVT::i64
) {
4185 CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, MVT::Glue
,
4187 CurDAG
->SelectNodeTo(N
, PPC::ADDZE8
, MVT::i64
, SDValue(Op
, 0),
4191 assert(N
->getValueType(0) == MVT::i32
&&
4192 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4194 CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, MVT::Glue
,
4196 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
, SDValue(Op
, 0),
4203 // Change TLS initial-exec D-form stores to X-form stores.
4204 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
4205 if (EnableTLSOpt
&& PPCSubTarget
->isELFv2ABI() &&
4206 ST
->getAddressingMode() != ISD::PRE_INC
)
4207 if (tryTLSXFormStore(ST
))
4212 // Handle preincrement loads.
4213 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
4214 EVT LoadedVT
= LD
->getMemoryVT();
4216 // Normal loads are handled by code generated from the .td file.
4217 if (LD
->getAddressingMode() != ISD::PRE_INC
) {
4218 // Change TLS initial-exec D-form loads to X-form loads.
4219 if (EnableTLSOpt
&& PPCSubTarget
->isELFv2ABI())
4220 if (tryTLSXFormLoad(LD
))
4225 SDValue Offset
= LD
->getOffset();
4226 if (Offset
.getOpcode() == ISD::TargetConstant
||
4227 Offset
.getOpcode() == ISD::TargetGlobalAddress
) {
4230 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
4231 if (LD
->getValueType(0) != MVT::i64
) {
4232 // Handle PPC32 integer and normal FP loads.
4233 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4234 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4235 default: llvm_unreachable("Invalid PPC load type!");
4236 case MVT::f64
: Opcode
= PPC::LFDU
; break;
4237 case MVT::f32
: Opcode
= PPC::LFSU
; break;
4238 case MVT::i32
: Opcode
= PPC::LWZU
; break;
4239 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU
: PPC::LHZU
; break;
4241 case MVT::i8
: Opcode
= PPC::LBZU
; break;
4244 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
4245 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4246 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4247 default: llvm_unreachable("Invalid PPC load type!");
4248 case MVT::i64
: Opcode
= PPC::LDU
; break;
4249 case MVT::i32
: Opcode
= PPC::LWZU8
; break;
4250 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU8
: PPC::LHZU8
; break;
4252 case MVT::i8
: Opcode
= PPC::LBZU8
; break;
4256 SDValue Chain
= LD
->getChain();
4257 SDValue Base
= LD
->getBasePtr();
4258 SDValue Ops
[] = { Offset
, Base
, Chain
};
4259 SDNode
*MN
= CurDAG
->getMachineNode(
4260 Opcode
, dl
, LD
->getValueType(0),
4261 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
4262 transferMemOperands(N
, MN
);
4267 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
4268 if (LD
->getValueType(0) != MVT::i64
) {
4269 // Handle PPC32 integer and normal FP loads.
4270 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4271 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4272 default: llvm_unreachable("Invalid PPC load type!");
4273 case MVT::v4f64
: Opcode
= PPC::QVLFDUX
; break; // QPX
4274 case MVT::v4f32
: Opcode
= PPC::QVLFSUX
; break; // QPX
4275 case MVT::f64
: Opcode
= PPC::LFDUX
; break;
4276 case MVT::f32
: Opcode
= PPC::LFSUX
; break;
4277 case MVT::i32
: Opcode
= PPC::LWZUX
; break;
4278 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX
: PPC::LHZUX
; break;
4280 case MVT::i8
: Opcode
= PPC::LBZUX
; break;
4283 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
4284 assert((!isSExt
|| LoadedVT
== MVT::i16
|| LoadedVT
== MVT::i32
) &&
4285 "Invalid sext update load");
4286 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4287 default: llvm_unreachable("Invalid PPC load type!");
4288 case MVT::i64
: Opcode
= PPC::LDUX
; break;
4289 case MVT::i32
: Opcode
= isSExt
? PPC::LWAUX
: PPC::LWZUX8
; break;
4290 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX8
: PPC::LHZUX8
; break;
4292 case MVT::i8
: Opcode
= PPC::LBZUX8
; break;
4296 SDValue Chain
= LD
->getChain();
4297 SDValue Base
= LD
->getBasePtr();
4298 SDValue Ops
[] = { Base
, Offset
, Chain
};
4299 SDNode
*MN
= CurDAG
->getMachineNode(
4300 Opcode
, dl
, LD
->getValueType(0),
4301 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
4302 transferMemOperands(N
, MN
);
4309 unsigned Imm
, Imm2
, SH
, MB
, ME
;
4312 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4313 // with a mask, emit rlwinm
4314 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4315 isRotateAndMask(N
->getOperand(0).getNode(), Imm
, false, SH
, MB
, ME
)) {
4316 SDValue Val
= N
->getOperand(0).getOperand(0);
4317 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4318 getI32Imm(ME
, dl
) };
4319 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4322 // If this is just a masked value where the input is not handled above, and
4323 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4324 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4325 isRunOfOnes(Imm
, MB
, ME
) &&
4326 N
->getOperand(0).getOpcode() != ISD::ROTL
) {
4327 SDValue Val
= N
->getOperand(0);
4328 SDValue Ops
[] = { Val
, getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4329 getI32Imm(ME
, dl
) };
4330 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4333 // If this is a 64-bit zero-extension mask, emit rldicl.
4334 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
4336 SDValue Val
= N
->getOperand(0);
4337 MB
= 64 - countTrailingOnes(Imm64
);
4340 if (Val
.getOpcode() == ISD::ANY_EXTEND
) {
4341 auto Op0
= Val
.getOperand(0);
4342 if ( Op0
.getOpcode() == ISD::SRL
&&
4343 isInt32Immediate(Op0
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4345 auto ResultType
= Val
.getNode()->getValueType(0);
4346 auto ImDef
= CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
4348 SDValue
IDVal (ImDef
, 0);
4350 Val
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
4351 ResultType
, IDVal
, Op0
.getOperand(0),
4352 getI32Imm(1, dl
)), 0);
4357 // If the operand is a logical right shift, we can fold it into this
4358 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4359 // for n <= mb. The right shift is really a left rotate followed by a
4360 // mask, and this mask is a more-restrictive sub-mask of the mask implied
4362 if (Val
.getOpcode() == ISD::SRL
&&
4363 isInt32Immediate(Val
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4364 assert(Imm
< 64 && "Illegal shift amount");
4365 Val
= Val
.getOperand(0);
4369 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
4370 CurDAG
->SelectNodeTo(N
, PPC::RLDICL
, MVT::i64
, Ops
);
4373 // If this is a negated 64-bit zero-extension mask,
4374 // i.e. the immediate is a sequence of ones from most significant side
4375 // and all zero for reminder, we should use rldicr.
4376 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
4377 isMask_64(~Imm64
)) {
4378 SDValue Val
= N
->getOperand(0);
4379 MB
= 63 - countTrailingOnes(~Imm64
);
4381 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
4382 CurDAG
->SelectNodeTo(N
, PPC::RLDICR
, MVT::i64
, Ops
);
4386 // AND X, 0 -> 0, not "rlwinm 32".
4387 if (isInt32Immediate(N
->getOperand(1), Imm
) && (Imm
== 0)) {
4388 ReplaceUses(SDValue(N
, 0), N
->getOperand(1));
4391 // ISD::OR doesn't get all the bitfield insertion fun.
4392 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4394 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4395 N
->getOperand(0).getOpcode() == ISD::OR
&&
4396 isInt32Immediate(N
->getOperand(0).getOperand(1), Imm2
)) {
4397 // The idea here is to check whether this is equivalent to:
4398 // (c1 & m) | (x & ~m)
4399 // where m is a run-of-ones mask. The logic here is that, for each bit in
4401 // - if both are 1, then the output will be 1.
4402 // - if both are 0, then the output will be 0.
4403 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4405 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4407 // If that last condition is never the case, then we can form m from the
4408 // bits that are the same between c1 and c2.
4410 if (isRunOfOnes(~(Imm
^Imm2
), MB
, ME
) && !(~Imm
& Imm2
)) {
4411 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4412 N
->getOperand(0).getOperand(1),
4413 getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4414 getI32Imm(ME
, dl
) };
4415 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
4420 // Other cases are autogenerated.
4424 if (N
->getValueType(0) == MVT::i32
)
4425 if (tryBitfieldInsert(N
))
4429 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
4430 isIntS16Immediate(N
->getOperand(1), Imm
)) {
4432 CurDAG
->computeKnownBits(N
->getOperand(0), LHSKnown
);
4434 // If this is equivalent to an add, then we can fold it with the
4435 // FrameIndex calculation.
4436 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)Imm
) == ~0ULL) {
4437 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
4442 // OR with a 32-bit immediate can be handled by ori + oris
4443 // without creating an immediate in a GPR.
4445 bool IsPPC64
= PPCSubTarget
->isPPC64();
4446 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
4447 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
4448 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4449 uint64_t ImmHi
= Imm64
>> 16;
4450 uint64_t ImmLo
= Imm64
& 0xFFFF;
4451 if (ImmHi
!= 0 && ImmLo
!= 0) {
4452 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
4454 getI16Imm(ImmLo
, dl
));
4455 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
4456 CurDAG
->SelectNodeTo(N
, PPC::ORIS8
, MVT::i64
, Ops1
);
4461 // Other cases are autogenerated.
4465 // XOR with a 32-bit immediate can be handled by xori + xoris
4466 // without creating an immediate in a GPR.
4468 bool IsPPC64
= PPCSubTarget
->isPPC64();
4469 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
4470 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
4471 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4472 uint64_t ImmHi
= Imm64
>> 16;
4473 uint64_t ImmLo
= Imm64
& 0xFFFF;
4474 if (ImmHi
!= 0 && ImmLo
!= 0) {
4475 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
4477 getI16Imm(ImmLo
, dl
));
4478 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
4479 CurDAG
->SelectNodeTo(N
, PPC::XORIS8
, MVT::i64
, Ops1
);
4488 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
4489 isIntS16Immediate(N
->getOperand(1), Imm
)) {
4490 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
4497 unsigned Imm
, SH
, MB
, ME
;
4498 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
4499 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
4500 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4501 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4502 getI32Imm(ME
, dl
) };
4503 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4507 // Other cases are autogenerated.
4511 unsigned Imm
, SH
, MB
, ME
;
4512 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
4513 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
4514 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4515 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4516 getI32Imm(ME
, dl
) };
4517 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4521 // Other cases are autogenerated.
4524 // FIXME: Remove this once the ANDI glue bug is fixed:
4525 case PPCISD::ANDIo_1_EQ_BIT
:
4526 case PPCISD::ANDIo_1_GT_BIT
: {
4530 EVT InVT
= N
->getOperand(0).getValueType();
4531 assert((InVT
== MVT::i64
|| InVT
== MVT::i32
) &&
4532 "Invalid input type for ANDIo_1_EQ_BIT");
4534 unsigned Opcode
= (InVT
== MVT::i64
) ? PPC::ANDIo8
: PPC::ANDIo
;
4535 SDValue
AndI(CurDAG
->getMachineNode(Opcode
, dl
, InVT
, MVT::Glue
,
4537 CurDAG
->getTargetConstant(1, dl
, InVT
)),
4539 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
4541 CurDAG
->getTargetConstant(N
->getOpcode() == PPCISD::ANDIo_1_EQ_BIT
?
4542 PPC::sub_eq
: PPC::sub_gt
, dl
, MVT::i32
);
4544 CurDAG
->SelectNodeTo(N
, TargetOpcode::EXTRACT_SUBREG
, MVT::i1
, CR0Reg
,
4545 SRIdxVal
, SDValue(AndI
.getNode(), 1) /* glue */);
4548 case ISD::SELECT_CC
: {
4549 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
4551 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
4552 bool isPPC64
= (PtrVT
== MVT::i64
);
4554 // If this is a select of i1 operands, we'll pattern match it.
4555 if (PPCSubTarget
->useCRBits() &&
4556 N
->getOperand(0).getValueType() == MVT::i1
)
4559 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
4561 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
4562 if (ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
4563 if (ConstantSDNode
*N3C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
4564 if (N1C
->isNullValue() && N3C
->isNullValue() &&
4565 N2C
->getZExtValue() == 1ULL && CC
== ISD::SETNE
&&
4566 // FIXME: Implement this optzn for PPC64.
4567 N
->getValueType(0) == MVT::i32
) {
4569 CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4570 N
->getOperand(0), getI32Imm(~0U, dl
));
4571 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(Tmp
, 0),
4572 N
->getOperand(0), SDValue(Tmp
, 1));
4576 SDValue CCReg
= SelectCC(N
->getOperand(0), N
->getOperand(1), CC
, dl
);
4578 if (N
->getValueType(0) == MVT::i1
) {
4579 // An i1 select is: (c & t) | (!c & f).
4581 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
4585 default: llvm_unreachable("Invalid CC index");
4586 case 0: SRI
= PPC::sub_lt
; break;
4587 case 1: SRI
= PPC::sub_gt
; break;
4588 case 2: SRI
= PPC::sub_eq
; break;
4589 case 3: SRI
= PPC::sub_un
; break;
4592 SDValue CCBit
= CurDAG
->getTargetExtractSubreg(SRI
, dl
, MVT::i1
, CCReg
);
4594 SDValue
NotCCBit(CurDAG
->getMachineNode(PPC::CRNOR
, dl
, MVT::i1
,
4596 SDValue C
= Inv
? NotCCBit
: CCBit
,
4597 NotC
= Inv
? CCBit
: NotCCBit
;
4599 SDValue
CAndT(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
4600 C
, N
->getOperand(2)), 0);
4601 SDValue
NotCAndF(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
4602 NotC
, N
->getOperand(3)), 0);
4604 CurDAG
->SelectNodeTo(N
, PPC::CROR
, MVT::i1
, CAndT
, NotCAndF
);
4608 unsigned BROpc
= getPredicateForSetCC(CC
);
4610 unsigned SelectCCOp
;
4611 if (N
->getValueType(0) == MVT::i32
)
4612 SelectCCOp
= PPC::SELECT_CC_I4
;
4613 else if (N
->getValueType(0) == MVT::i64
)
4614 SelectCCOp
= PPC::SELECT_CC_I8
;
4615 else if (N
->getValueType(0) == MVT::f32
) {
4616 if (PPCSubTarget
->hasP8Vector())
4617 SelectCCOp
= PPC::SELECT_CC_VSSRC
;
4618 else if (PPCSubTarget
->hasSPE())
4619 SelectCCOp
= PPC::SELECT_CC_SPE4
;
4621 SelectCCOp
= PPC::SELECT_CC_F4
;
4622 } else if (N
->getValueType(0) == MVT::f64
) {
4623 if (PPCSubTarget
->hasVSX())
4624 SelectCCOp
= PPC::SELECT_CC_VSFRC
;
4625 else if (PPCSubTarget
->hasSPE())
4626 SelectCCOp
= PPC::SELECT_CC_SPE
;
4628 SelectCCOp
= PPC::SELECT_CC_F8
;
4629 } else if (N
->getValueType(0) == MVT::f128
)
4630 SelectCCOp
= PPC::SELECT_CC_F16
;
4631 else if (PPCSubTarget
->hasSPE())
4632 SelectCCOp
= PPC::SELECT_CC_SPE
;
4633 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f64
)
4634 SelectCCOp
= PPC::SELECT_CC_QFRC
;
4635 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f32
)
4636 SelectCCOp
= PPC::SELECT_CC_QSRC
;
4637 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4i1
)
4638 SelectCCOp
= PPC::SELECT_CC_QBRC
;
4639 else if (N
->getValueType(0) == MVT::v2f64
||
4640 N
->getValueType(0) == MVT::v2i64
)
4641 SelectCCOp
= PPC::SELECT_CC_VSRC
;
4643 SelectCCOp
= PPC::SELECT_CC_VRRC
;
4645 SDValue Ops
[] = { CCReg
, N
->getOperand(2), N
->getOperand(3),
4646 getI32Imm(BROpc
, dl
) };
4647 CurDAG
->SelectNodeTo(N
, SelectCCOp
, N
->getValueType(0), Ops
);
4650 case ISD::VECTOR_SHUFFLE
:
4651 if (PPCSubTarget
->hasVSX() && (N
->getValueType(0) == MVT::v2f64
||
4652 N
->getValueType(0) == MVT::v2i64
)) {
4653 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
4655 SDValue Op1
= N
->getOperand(SVN
->getMaskElt(0) < 2 ? 0 : 1),
4656 Op2
= N
->getOperand(SVN
->getMaskElt(1) < 2 ? 0 : 1);
4659 for (int i
= 0; i
< 2; ++i
)
4660 if (SVN
->getMaskElt(i
) <= 0 || SVN
->getMaskElt(i
) == 2)
4665 if (Op1
== Op2
&& DM
[0] == 0 && DM
[1] == 0 &&
4666 Op1
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
4667 isa
<LoadSDNode
>(Op1
.getOperand(0))) {
4668 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op1
.getOperand(0));
4669 SDValue Base
, Offset
;
4671 if (LD
->isUnindexed() && LD
->hasOneUse() && Op1
.hasOneUse() &&
4672 (LD
->getMemoryVT() == MVT::f64
||
4673 LD
->getMemoryVT() == MVT::i64
) &&
4674 SelectAddrIdxOnly(LD
->getBasePtr(), Base
, Offset
)) {
4675 SDValue Chain
= LD
->getChain();
4676 SDValue Ops
[] = { Base
, Offset
, Chain
};
4677 MachineMemOperand
*MemOp
= LD
->getMemOperand();
4678 SDNode
*NewN
= CurDAG
->SelectNodeTo(N
, PPC::LXVDSX
,
4679 N
->getValueType(0), Ops
);
4680 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(NewN
), {MemOp
});
4685 // For little endian, we must swap the input operands and adjust
4686 // the mask elements (reverse and invert them).
4687 if (PPCSubTarget
->isLittleEndian()) {
4688 std::swap(Op1
, Op2
);
4689 unsigned tmp
= DM
[0];
4694 SDValue DMV
= CurDAG
->getTargetConstant(DM
[1] | (DM
[0] << 1), dl
,
4696 SDValue Ops
[] = { Op1
, Op2
, DMV
};
4697 CurDAG
->SelectNodeTo(N
, PPC::XXPERMDI
, N
->getValueType(0), Ops
);
4704 bool IsPPC64
= PPCSubTarget
->isPPC64();
4705 SDValue Ops
[] = { N
->getOperand(1), N
->getOperand(0) };
4706 CurDAG
->SelectNodeTo(N
, N
->getOpcode() == PPCISD::BDNZ
4707 ? (IsPPC64
? PPC::BDNZ8
: PPC::BDNZ
)
4708 : (IsPPC64
? PPC::BDZ8
: PPC::BDZ
),
4712 case PPCISD::COND_BRANCH
: {
4713 // Op #0 is the Chain.
4714 // Op #1 is the PPC::PRED_* number.
4716 // Op #3 is the Dest MBB
4717 // Op #4 is the Flag.
4718 // Prevent PPC::PRED_* from being selected into LI.
4719 unsigned PCC
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
4720 if (EnableBranchHint
)
4721 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(3));
4723 SDValue Pred
= getI32Imm(PCC
, dl
);
4724 SDValue Ops
[] = { Pred
, N
->getOperand(2), N
->getOperand(3),
4725 N
->getOperand(0), N
->getOperand(4) };
4726 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
4730 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
4731 unsigned PCC
= getPredicateForSetCC(CC
);
4733 if (N
->getOperand(2).getValueType() == MVT::i1
) {
4737 default: llvm_unreachable("Unexpected Boolean-operand predicate");
4738 case PPC::PRED_LT
: Opc
= PPC::CRANDC
; Swap
= true; break;
4739 case PPC::PRED_LE
: Opc
= PPC::CRORC
; Swap
= true; break;
4740 case PPC::PRED_EQ
: Opc
= PPC::CREQV
; Swap
= false; break;
4741 case PPC::PRED_GE
: Opc
= PPC::CRORC
; Swap
= false; break;
4742 case PPC::PRED_GT
: Opc
= PPC::CRANDC
; Swap
= false; break;
4743 case PPC::PRED_NE
: Opc
= PPC::CRXOR
; Swap
= false; break;
4746 SDValue
BitComp(CurDAG
->getMachineNode(Opc
, dl
, MVT::i1
,
4747 N
->getOperand(Swap
? 3 : 2),
4748 N
->getOperand(Swap
? 2 : 3)), 0);
4749 CurDAG
->SelectNodeTo(N
, PPC::BC
, MVT::Other
, BitComp
, N
->getOperand(4),
4754 if (EnableBranchHint
)
4755 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(4));
4757 SDValue CondCode
= SelectCC(N
->getOperand(2), N
->getOperand(3), CC
, dl
);
4758 SDValue Ops
[] = { getI32Imm(PCC
, dl
), CondCode
,
4759 N
->getOperand(4), N
->getOperand(0) };
4760 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
4764 // FIXME: Should custom lower this.
4765 SDValue Chain
= N
->getOperand(0);
4766 SDValue Target
= N
->getOperand(1);
4767 unsigned Opc
= Target
.getValueType() == MVT::i32
? PPC::MTCTR
: PPC::MTCTR8
;
4768 unsigned Reg
= Target
.getValueType() == MVT::i32
? PPC::BCTR
: PPC::BCTR8
;
4769 Chain
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, Target
,
4771 CurDAG
->SelectNodeTo(N
, Reg
, MVT::Other
, Chain
);
4774 case PPCISD::TOC_ENTRY
: {
4775 assert ((PPCSubTarget
->isPPC64() || PPCSubTarget
->isSVR4ABI()) &&
4776 "Only supported for 64-bit ABI and 32-bit SVR4");
4777 if (PPCSubTarget
->isSVR4ABI() && !PPCSubTarget
->isPPC64()) {
4778 SDValue GA
= N
->getOperand(0);
4779 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LWZtoc
, dl
, MVT::i32
, GA
,
4781 transferMemOperands(N
, MN
);
4786 // For medium and large code model, we generate two instructions as
4787 // described below. Otherwise we allow SelectCodeCommon to handle this,
4788 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
4789 CodeModel::Model CModel
= TM
.getCodeModel();
4790 if (CModel
!= CodeModel::Medium
&& CModel
!= CodeModel::Large
)
4793 // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
4794 // If it must be toc-referenced according to PPCSubTarget, we generate:
4795 // LDtocL(@sym, ADDIStocHA(%x2, @sym))
4796 // Otherwise we generate:
4797 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
4798 SDValue GA
= N
->getOperand(0);
4799 SDValue TOCbase
= N
->getOperand(1);
4800 SDNode
*Tmp
= CurDAG
->getMachineNode(PPC::ADDIStocHA
, dl
, MVT::i64
,
4803 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
) ||
4804 CModel
== CodeModel::Large
) {
4805 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LDtocL
, dl
, MVT::i64
, GA
,
4807 transferMemOperands(N
, MN
);
4812 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
)) {
4813 const GlobalValue
*GV
= G
->getGlobal();
4814 unsigned char GVFlags
= PPCSubTarget
->classifyGlobalReference(GV
);
4815 if (GVFlags
& PPCII::MO_NLP_FLAG
) {
4816 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LDtocL
, dl
, MVT::i64
, GA
,
4818 transferMemOperands(N
, MN
);
4824 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ADDItocL
, dl
, MVT::i64
,
4825 SDValue(Tmp
, 0), GA
));
4828 case PPCISD::PPC32_PICGOT
:
4829 // Generate a PIC-safe GOT reference.
4830 assert(!PPCSubTarget
->isPPC64() && PPCSubTarget
->isSVR4ABI() &&
4831 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
4832 CurDAG
->SelectNodeTo(N
, PPC::PPC32PICGOT
,
4833 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()),
4837 case PPCISD::VADD_SPLAT
: {
4838 // This expands into one of three sequences, depending on whether
4839 // the first operand is odd or even, positive or negative.
4840 assert(isa
<ConstantSDNode
>(N
->getOperand(0)) &&
4841 isa
<ConstantSDNode
>(N
->getOperand(1)) &&
4842 "Invalid operand on VADD_SPLAT!");
4844 int Elt
= N
->getConstantOperandVal(0);
4845 int EltSize
= N
->getConstantOperandVal(1);
4846 unsigned Opc1
, Opc2
, Opc3
;
4850 Opc1
= PPC::VSPLTISB
;
4851 Opc2
= PPC::VADDUBM
;
4852 Opc3
= PPC::VSUBUBM
;
4854 } else if (EltSize
== 2) {
4855 Opc1
= PPC::VSPLTISH
;
4856 Opc2
= PPC::VADDUHM
;
4857 Opc3
= PPC::VSUBUHM
;
4860 assert(EltSize
== 4 && "Invalid element size on VADD_SPLAT!");
4861 Opc1
= PPC::VSPLTISW
;
4862 Opc2
= PPC::VADDUWM
;
4863 Opc3
= PPC::VSUBUWM
;
4867 if ((Elt
& 1) == 0) {
4868 // Elt is even, in the range [-32,-18] + [16,30].
4870 // Convert: VADD_SPLAT elt, size
4871 // Into: tmp = VSPLTIS[BHW] elt
4872 // VADDU[BHW]M tmp, tmp
4873 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
4874 SDValue EltVal
= getI32Imm(Elt
>> 1, dl
);
4875 SDNode
*Tmp
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
4876 SDValue TmpVal
= SDValue(Tmp
, 0);
4877 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, TmpVal
, TmpVal
));
4879 } else if (Elt
> 0) {
4880 // Elt is odd and positive, in the range [17,31].
4882 // Convert: VADD_SPLAT elt, size
4883 // Into: tmp1 = VSPLTIS[BHW] elt-16
4884 // tmp2 = VSPLTIS[BHW] -16
4885 // VSUBU[BHW]M tmp1, tmp2
4886 SDValue EltVal
= getI32Imm(Elt
- 16, dl
);
4887 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
4888 EltVal
= getI32Imm(-16, dl
);
4889 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
4890 ReplaceNode(N
, CurDAG
->getMachineNode(Opc3
, dl
, VT
, SDValue(Tmp1
, 0),
4894 // Elt is odd and negative, in the range [-31,-17].
4896 // Convert: VADD_SPLAT elt, size
4897 // Into: tmp1 = VSPLTIS[BHW] elt+16
4898 // tmp2 = VSPLTIS[BHW] -16
4899 // VADDU[BHW]M tmp1, tmp2
4900 SDValue EltVal
= getI32Imm(Elt
+ 16, dl
);
4901 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
4902 EltVal
= getI32Imm(-16, dl
);
4903 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
4904 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, SDValue(Tmp1
, 0),
4910 assert(PPCSubTarget
->hasP9Vector() && "ABS is supported with P9 Vector");
4912 // For vector absolute difference, we use VABSDUW instruction of POWER9.
4913 // Since VABSDU instructions are for unsigned integers, we need adjustment
4914 // for signed integers.
4915 // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
4916 // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
4917 // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
4918 EVT VecVT
= N
->getOperand(0).getValueType();
4919 SDNode
*AbsOp
= nullptr;
4922 if (VecVT
== MVT::v4i32
)
4923 AbsOpcode
= PPC::VABSDUW
;
4924 else if (VecVT
== MVT::v8i16
)
4925 AbsOpcode
= PPC::VABSDUH
;
4926 else if (VecVT
== MVT::v16i8
)
4927 AbsOpcode
= PPC::VABSDUB
;
4929 llvm_unreachable("Unsupported vector data type for ISD::ABS");
4931 // Even for signed integers, we can skip adjustment if all values are
4932 // known to be positive (as signed integer) due to zero-extended inputs.
4933 if (N
->getOperand(0).getOpcode() == ISD::SUB
&&
4934 N
->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND
&&
4935 N
->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND
) {
4936 AbsOp
= CurDAG
->getMachineNode(AbsOpcode
, dl
, VecVT
,
4937 SDValue(N
->getOperand(0)->getOperand(0)),
4938 SDValue(N
->getOperand(0)->getOperand(1)));
4939 ReplaceNode(N
, AbsOp
);
4942 if (N
->getOperand(0).getOpcode() == ISD::SUB
) {
4943 SDValue SubVal
= N
->getOperand(0);
4944 SDNode
*Op0
= flipSignBit(SubVal
->getOperand(0));
4945 SDNode
*Op1
= flipSignBit(SubVal
->getOperand(1));
4946 AbsOp
= CurDAG
->getMachineNode(AbsOpcode
, dl
, VecVT
,
4947 SDValue(Op0
, 0), SDValue(Op1
, 0));
4950 SDNode
*Op1
= nullptr;
4951 SDNode
*Op0
= flipSignBit(N
->getOperand(0), &Op1
);
4952 AbsOp
= CurDAG
->getMachineNode(AbsOpcode
, dl
, VecVT
, SDValue(Op0
, 0),
4955 ReplaceNode(N
, AbsOp
);
4963 // If the target supports the cmpb instruction, do the idiom recognition here.
4964 // We don't do this as a DAG combine because we don't want to do it as nodes
4965 // are being combined (because we might miss part of the eventual idiom). We
4966 // don't want to do it during instruction selection because we want to reuse
4967 // the logic for lowering the masking operations already part of the
4968 // instruction selector.
4969 SDValue
PPCDAGToDAGISel::combineToCMPB(SDNode
*N
) {
4972 assert(N
->getOpcode() == ISD::OR
&&
4973 "Only OR nodes are supported for CMPB");
4976 if (!PPCSubTarget
->hasCMPB())
4979 if (N
->getValueType(0) != MVT::i32
&&
4980 N
->getValueType(0) != MVT::i64
)
4983 EVT VT
= N
->getValueType(0);
4986 bool BytesFound
[8] = {false, false, false, false, false, false, false, false};
4987 uint64_t Mask
= 0, Alt
= 0;
4989 auto IsByteSelectCC
= [this](SDValue O
, unsigned &b
,
4990 uint64_t &Mask
, uint64_t &Alt
,
4991 SDValue
&LHS
, SDValue
&RHS
) {
4992 if (O
.getOpcode() != ISD::SELECT_CC
)
4994 ISD::CondCode CC
= cast
<CondCodeSDNode
>(O
.getOperand(4))->get();
4996 if (!isa
<ConstantSDNode
>(O
.getOperand(2)) ||
4997 !isa
<ConstantSDNode
>(O
.getOperand(3)))
5000 uint64_t PM
= O
.getConstantOperandVal(2);
5001 uint64_t PAlt
= O
.getConstantOperandVal(3);
5002 for (b
= 0; b
< 8; ++b
) {
5003 uint64_t Mask
= UINT64_C(0xFF) << (8*b
);
5004 if (PM
&& (PM
& Mask
) == PM
&& (PAlt
& Mask
) == PAlt
)
5013 if (!isa
<ConstantSDNode
>(O
.getOperand(1)) ||
5014 O
.getConstantOperandVal(1) != 0) {
5015 SDValue Op0
= O
.getOperand(0), Op1
= O
.getOperand(1);
5016 if (Op0
.getOpcode() == ISD::TRUNCATE
)
5017 Op0
= Op0
.getOperand(0);
5018 if (Op1
.getOpcode() == ISD::TRUNCATE
)
5019 Op1
= Op1
.getOperand(0);
5021 if (Op0
.getOpcode() == ISD::SRL
&& Op1
.getOpcode() == ISD::SRL
&&
5022 Op0
.getOperand(1) == Op1
.getOperand(1) && CC
== ISD::SETEQ
&&
5023 isa
<ConstantSDNode
>(Op0
.getOperand(1))) {
5025 unsigned Bits
= Op0
.getValueSizeInBits();
5028 if (Op0
.getConstantOperandVal(1) != Bits
-8)
5031 LHS
= Op0
.getOperand(0);
5032 RHS
= Op1
.getOperand(0);
5036 // When we have small integers (i16 to be specific), the form present
5037 // post-legalization uses SETULT in the SELECT_CC for the
5038 // higher-order byte, depending on the fact that the
5039 // even-higher-order bytes are known to all be zero, for example:
5040 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5041 // (so when the second byte is the same, because all higher-order
5042 // bits from bytes 3 and 4 are known to be zero, the result of the
5043 // xor can be at most 255)
5044 if (Op0
.getOpcode() == ISD::XOR
&& CC
== ISD::SETULT
&&
5045 isa
<ConstantSDNode
>(O
.getOperand(1))) {
5047 uint64_t ULim
= O
.getConstantOperandVal(1);
5048 if (ULim
!= (UINT64_C(1) << b
*8))
5051 // Now we need to make sure that the upper bytes are known to be
5053 unsigned Bits
= Op0
.getValueSizeInBits();
5054 if (!CurDAG
->MaskedValueIsZero(
5055 Op0
, APInt::getHighBitsSet(Bits
, Bits
- (b
+ 1) * 8)))
5058 LHS
= Op0
.getOperand(0);
5059 RHS
= Op0
.getOperand(1);
5066 if (CC
!= ISD::SETEQ
)
5069 SDValue Op
= O
.getOperand(0);
5070 if (Op
.getOpcode() == ISD::AND
) {
5071 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5073 if (Op
.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b
)))
5076 SDValue XOR
= Op
.getOperand(0);
5077 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5078 XOR
= XOR
.getOperand(0);
5079 if (XOR
.getOpcode() != ISD::XOR
)
5082 LHS
= XOR
.getOperand(0);
5083 RHS
= XOR
.getOperand(1);
5085 } else if (Op
.getOpcode() == ISD::SRL
) {
5086 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5088 unsigned Bits
= Op
.getValueSizeInBits();
5091 if (Op
.getConstantOperandVal(1) != Bits
-8)
5094 SDValue XOR
= Op
.getOperand(0);
5095 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5096 XOR
= XOR
.getOperand(0);
5097 if (XOR
.getOpcode() != ISD::XOR
)
5100 LHS
= XOR
.getOperand(0);
5101 RHS
= XOR
.getOperand(1);
5108 SmallVector
<SDValue
, 8> Queue(1, SDValue(N
, 0));
5109 while (!Queue
.empty()) {
5110 SDValue V
= Queue
.pop_back_val();
5112 for (const SDValue
&O
: V
.getNode()->ops()) {
5114 uint64_t M
= 0, A
= 0;
5116 if (O
.getOpcode() == ISD::OR
) {
5118 } else if (IsByteSelectCC(O
, b
, M
, A
, OLHS
, ORHS
)) {
5122 BytesFound
[b
] = true;
5125 } else if ((LHS
== ORHS
&& RHS
== OLHS
) ||
5126 (RHS
== ORHS
&& LHS
== OLHS
)) {
5127 BytesFound
[b
] = true;
5139 unsigned LastB
= 0, BCnt
= 0;
5140 for (unsigned i
= 0; i
< 8; ++i
)
5141 if (BytesFound
[LastB
]) {
5146 if (!LastB
|| BCnt
< 2)
5149 // Because we'll be zero-extending the output anyway if don't have a specific
5150 // value for each input byte (via the Mask), we can 'anyext' the inputs.
5151 if (LHS
.getValueType() != VT
) {
5152 LHS
= CurDAG
->getAnyExtOrTrunc(LHS
, dl
, VT
);
5153 RHS
= CurDAG
->getAnyExtOrTrunc(RHS
, dl
, VT
);
5156 Res
= CurDAG
->getNode(PPCISD::CMPB
, dl
, VT
, LHS
, RHS
);
5158 bool NonTrivialMask
= ((int64_t) Mask
) != INT64_C(-1);
5159 if (NonTrivialMask
&& !Alt
) {
5160 // Res = Mask & CMPB
5161 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
5162 CurDAG
->getConstant(Mask
, dl
, VT
));
5164 // Res = (CMPB & Mask) | (~CMPB & Alt)
5165 // Which, as suggested here:
5166 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5167 // can be written as:
5168 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5169 // useful because the (Alt ^ Mask) can be pre-computed.
5170 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
5171 CurDAG
->getConstant(Mask
^ Alt
, dl
, VT
));
5172 Res
= CurDAG
->getNode(ISD::XOR
, dl
, VT
, Res
,
5173 CurDAG
->getConstant(Alt
, dl
, VT
));
5179 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5180 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5181 // involves constant materialization of a 0 or a 1 or both. If the result of
5182 // the extension is then operated upon by some operator that can be constant
5183 // folded with a constant 0 or 1, and that constant can be materialized using
5184 // only one instruction (like a zero or one), then we should fold in those
5185 // operations with the select.
5186 void PPCDAGToDAGISel::foldBoolExts(SDValue
&Res
, SDNode
*&N
) {
5187 if (!PPCSubTarget
->useCRBits())
5190 if (N
->getOpcode() != ISD::ZERO_EXTEND
&&
5191 N
->getOpcode() != ISD::SIGN_EXTEND
&&
5192 N
->getOpcode() != ISD::ANY_EXTEND
)
5195 if (N
->getOperand(0).getValueType() != MVT::i1
)
5198 if (!N
->hasOneUse())
5202 EVT VT
= N
->getValueType(0);
5203 SDValue Cond
= N
->getOperand(0);
5205 CurDAG
->getConstant(N
->getOpcode() == ISD::SIGN_EXTEND
? -1 : 1, dl
, VT
);
5206 SDValue ConstFalse
= CurDAG
->getConstant(0, dl
, VT
);
5209 SDNode
*User
= *N
->use_begin();
5210 if (User
->getNumOperands() != 2)
5213 auto TryFold
= [this, N
, User
, dl
](SDValue Val
) {
5214 SDValue UserO0
= User
->getOperand(0), UserO1
= User
->getOperand(1);
5215 SDValue O0
= UserO0
.getNode() == N
? Val
: UserO0
;
5216 SDValue O1
= UserO1
.getNode() == N
? Val
: UserO1
;
5218 return CurDAG
->FoldConstantArithmetic(User
->getOpcode(), dl
,
5219 User
->getValueType(0),
5220 O0
.getNode(), O1
.getNode());
5223 // FIXME: When the semantics of the interaction between select and undef
5224 // are clearly defined, it may turn out to be unnecessary to break here.
5225 SDValue TrueRes
= TryFold(ConstTrue
);
5226 if (!TrueRes
|| TrueRes
.isUndef())
5228 SDValue FalseRes
= TryFold(ConstFalse
);
5229 if (!FalseRes
|| FalseRes
.isUndef())
5232 // For us to materialize these using one instruction, we must be able to
5233 // represent them as signed 16-bit integers.
5234 uint64_t True
= cast
<ConstantSDNode
>(TrueRes
)->getZExtValue(),
5235 False
= cast
<ConstantSDNode
>(FalseRes
)->getZExtValue();
5236 if (!isInt
<16>(True
) || !isInt
<16>(False
))
5239 // We can replace User with a new SELECT node, and try again to see if we
5240 // can fold the select with its user.
5241 Res
= CurDAG
->getSelect(dl
, User
->getValueType(0), Cond
, TrueRes
, FalseRes
);
5243 ConstTrue
= TrueRes
;
5244 ConstFalse
= FalseRes
;
5245 } while (N
->hasOneUse());
5248 void PPCDAGToDAGISel::PreprocessISelDAG() {
5249 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
5251 bool MadeChange
= false;
5252 while (Position
!= CurDAG
->allnodes_begin()) {
5253 SDNode
*N
= &*--Position
;
5258 switch (N
->getOpcode()) {
5261 Res
= combineToCMPB(N
);
5266 foldBoolExts(Res
, N
);
5269 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
5270 LLVM_DEBUG(N
->dump(CurDAG
));
5271 LLVM_DEBUG(dbgs() << "\nNew: ");
5272 LLVM_DEBUG(Res
.getNode()->dump(CurDAG
));
5273 LLVM_DEBUG(dbgs() << "\n");
5275 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Res
);
5281 CurDAG
->RemoveDeadNodes();
5284 /// PostprocessISelDAG - Perform some late peephole optimizations
5285 /// on the DAG representation.
5286 void PPCDAGToDAGISel::PostprocessISelDAG() {
5287 // Skip peepholes at -O0.
5288 if (TM
.getOptLevel() == CodeGenOpt::None
)
5293 PeepholePPC64ZExt();
5296 // Check if all users of this node will become isel where the second operand
5297 // is the constant zero. If this is so, and if we can negate the condition,
5298 // then we can flip the true and false operands. This will allow the zero to
5299 // be folded with the isel so that we don't need to materialize a register
5301 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode
*N
) {
5302 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5305 if (!User
->isMachineOpcode())
5307 if (User
->getMachineOpcode() != PPC::SELECT_I4
&&
5308 User
->getMachineOpcode() != PPC::SELECT_I8
)
5311 SDNode
*Op2
= User
->getOperand(2).getNode();
5312 if (!Op2
->isMachineOpcode())
5315 if (Op2
->getMachineOpcode() != PPC::LI
&&
5316 Op2
->getMachineOpcode() != PPC::LI8
)
5319 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op2
->getOperand(0));
5323 if (!C
->isNullValue())
5330 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode
*N
) {
5331 SmallVector
<SDNode
*, 4> ToReplace
;
5332 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5335 assert((User
->getMachineOpcode() == PPC::SELECT_I4
||
5336 User
->getMachineOpcode() == PPC::SELECT_I8
) &&
5337 "Must have all select users");
5338 ToReplace
.push_back(User
);
5341 for (SmallVector
<SDNode
*, 4>::iterator UI
= ToReplace
.begin(),
5342 UE
= ToReplace
.end(); UI
!= UE
; ++UI
) {
5345 CurDAG
->getMachineNode(User
->getMachineOpcode(), SDLoc(User
),
5346 User
->getValueType(0), User
->getOperand(0),
5347 User
->getOperand(2),
5348 User
->getOperand(1));
5350 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5351 LLVM_DEBUG(User
->dump(CurDAG
));
5352 LLVM_DEBUG(dbgs() << "\nNew: ");
5353 LLVM_DEBUG(ResNode
->dump(CurDAG
));
5354 LLVM_DEBUG(dbgs() << "\n");
5356 ReplaceUses(User
, ResNode
);
5360 void PPCDAGToDAGISel::PeepholeCROps() {
5364 for (SDNode
&Node
: CurDAG
->allnodes()) {
5365 MachineSDNode
*MachineNode
= dyn_cast
<MachineSDNode
>(&Node
);
5366 if (!MachineNode
|| MachineNode
->use_empty())
5368 SDNode
*ResNode
= MachineNode
;
5370 bool Op1Set
= false, Op1Unset
= false,
5372 Op2Set
= false, Op2Unset
= false,
5375 unsigned Opcode
= MachineNode
->getMachineOpcode();
5386 SDValue Op
= MachineNode
->getOperand(1);
5387 if (Op
.isMachineOpcode()) {
5388 if (Op
.getMachineOpcode() == PPC::CRSET
)
5390 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
5392 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
5393 Op
.getOperand(0) == Op
.getOperand(1))
5400 case PPC::SELECT_I4
:
5401 case PPC::SELECT_I8
:
5402 case PPC::SELECT_F4
:
5403 case PPC::SELECT_F8
:
5404 case PPC::SELECT_QFRC
:
5405 case PPC::SELECT_QSRC
:
5406 case PPC::SELECT_QBRC
:
5407 case PPC::SELECT_SPE
:
5408 case PPC::SELECT_SPE4
:
5409 case PPC::SELECT_VRRC
:
5410 case PPC::SELECT_VSFRC
:
5411 case PPC::SELECT_VSSRC
:
5412 case PPC::SELECT_VSRC
: {
5413 SDValue Op
= MachineNode
->getOperand(0);
5414 if (Op
.isMachineOpcode()) {
5415 if (Op
.getMachineOpcode() == PPC::CRSET
)
5417 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
5419 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
5420 Op
.getOperand(0) == Op
.getOperand(1))
5427 bool SelectSwap
= false;
5431 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5433 ResNode
= MachineNode
->getOperand(0).getNode();
5436 ResNode
= MachineNode
->getOperand(1).getNode();
5439 ResNode
= MachineNode
->getOperand(0).getNode();
5440 else if (Op1Unset
|| Op2Unset
)
5441 // x & 0 = 0 & y = 0
5442 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5445 // ~x & y = andc(y, x)
5446 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5447 MVT::i1
, MachineNode
->getOperand(1),
5448 MachineNode
->getOperand(0).
5451 // x & ~y = andc(x, y)
5452 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5453 MVT::i1
, MachineNode
->getOperand(0),
5454 MachineNode
->getOperand(1).
5456 else if (AllUsersSelectZero(MachineNode
)) {
5457 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
5458 MVT::i1
, MachineNode
->getOperand(0),
5459 MachineNode
->getOperand(1));
5464 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5465 // nand(x, x) -> nor(x, x)
5466 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5467 MVT::i1
, MachineNode
->getOperand(0),
5468 MachineNode
->getOperand(0));
5470 // nand(1, y) -> nor(y, y)
5471 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5472 MVT::i1
, MachineNode
->getOperand(1),
5473 MachineNode
->getOperand(1));
5475 // nand(x, 1) -> nor(x, x)
5476 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5477 MVT::i1
, MachineNode
->getOperand(0),
5478 MachineNode
->getOperand(0));
5479 else if (Op1Unset
|| Op2Unset
)
5480 // nand(x, 0) = nand(0, y) = 1
5481 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5484 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5485 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5486 MVT::i1
, MachineNode
->getOperand(0).
5488 MachineNode
->getOperand(1));
5490 // nand(x, ~y) = ~x | y = orc(y, x)
5491 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5492 MVT::i1
, MachineNode
->getOperand(1).
5494 MachineNode
->getOperand(0));
5495 else if (AllUsersSelectZero(MachineNode
)) {
5496 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
5497 MVT::i1
, MachineNode
->getOperand(0),
5498 MachineNode
->getOperand(1));
5503 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5505 ResNode
= MachineNode
->getOperand(0).getNode();
5506 else if (Op1Set
|| Op2Set
)
5507 // x | 1 = 1 | y = 1
5508 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5512 ResNode
= MachineNode
->getOperand(1).getNode();
5515 ResNode
= MachineNode
->getOperand(0).getNode();
5517 // ~x | y = orc(y, x)
5518 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5519 MVT::i1
, MachineNode
->getOperand(1),
5520 MachineNode
->getOperand(0).
5523 // x | ~y = orc(x, y)
5524 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5525 MVT::i1
, MachineNode
->getOperand(0),
5526 MachineNode
->getOperand(1).
5528 else if (AllUsersSelectZero(MachineNode
)) {
5529 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5530 MVT::i1
, MachineNode
->getOperand(0),
5531 MachineNode
->getOperand(1));
5536 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5538 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5541 // xor(1, y) -> nor(y, y)
5542 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5543 MVT::i1
, MachineNode
->getOperand(1),
5544 MachineNode
->getOperand(1));
5546 // xor(x, 1) -> nor(x, x)
5547 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5548 MVT::i1
, MachineNode
->getOperand(0),
5549 MachineNode
->getOperand(0));
5552 ResNode
= MachineNode
->getOperand(1).getNode();
5555 ResNode
= MachineNode
->getOperand(0).getNode();
5557 // xor(~x, y) = eqv(x, y)
5558 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5559 MVT::i1
, MachineNode
->getOperand(0).
5561 MachineNode
->getOperand(1));
5563 // xor(x, ~y) = eqv(x, y)
5564 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5565 MVT::i1
, MachineNode
->getOperand(0),
5566 MachineNode
->getOperand(1).
5568 else if (AllUsersSelectZero(MachineNode
)) {
5569 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5570 MVT::i1
, MachineNode
->getOperand(0),
5571 MachineNode
->getOperand(1));
5576 if (Op1Set
|| Op2Set
)
5578 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5581 // nor(0, y) = ~y -> nor(y, y)
5582 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5583 MVT::i1
, MachineNode
->getOperand(1),
5584 MachineNode
->getOperand(1));
5587 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5588 MVT::i1
, MachineNode
->getOperand(0),
5589 MachineNode
->getOperand(0));
5591 // nor(~x, y) = andc(x, y)
5592 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5593 MVT::i1
, MachineNode
->getOperand(0).
5595 MachineNode
->getOperand(1));
5597 // nor(x, ~y) = andc(y, x)
5598 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5599 MVT::i1
, MachineNode
->getOperand(1).
5601 MachineNode
->getOperand(0));
5602 else if (AllUsersSelectZero(MachineNode
)) {
5603 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
5604 MVT::i1
, MachineNode
->getOperand(0),
5605 MachineNode
->getOperand(1));
5610 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5612 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5616 ResNode
= MachineNode
->getOperand(1).getNode();
5619 ResNode
= MachineNode
->getOperand(0).getNode();
5621 // eqv(0, y) = ~y -> nor(y, y)
5622 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5623 MVT::i1
, MachineNode
->getOperand(1),
5624 MachineNode
->getOperand(1));
5627 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5628 MVT::i1
, MachineNode
->getOperand(0),
5629 MachineNode
->getOperand(0));
5631 // eqv(~x, y) = xor(x, y)
5632 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5633 MVT::i1
, MachineNode
->getOperand(0).
5635 MachineNode
->getOperand(1));
5637 // eqv(x, ~y) = xor(x, y)
5638 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5639 MVT::i1
, MachineNode
->getOperand(0),
5640 MachineNode
->getOperand(1).
5642 else if (AllUsersSelectZero(MachineNode
)) {
5643 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5644 MVT::i1
, MachineNode
->getOperand(0),
5645 MachineNode
->getOperand(1));
5650 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5652 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5656 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5657 MVT::i1
, MachineNode
->getOperand(1),
5658 MachineNode
->getOperand(1));
5659 else if (Op1Unset
|| Op2Set
)
5660 // andc(0, y) = andc(x, 1) = 0
5661 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5665 ResNode
= MachineNode
->getOperand(0).getNode();
5667 // andc(~x, y) = ~(x | y) = nor(x, y)
5668 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5669 MVT::i1
, MachineNode
->getOperand(0).
5671 MachineNode
->getOperand(1));
5673 // andc(x, ~y) = x & y
5674 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
5675 MVT::i1
, MachineNode
->getOperand(0),
5676 MachineNode
->getOperand(1).
5678 else if (AllUsersSelectZero(MachineNode
)) {
5679 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5680 MVT::i1
, MachineNode
->getOperand(1),
5681 MachineNode
->getOperand(0));
5686 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5688 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5690 else if (Op1Set
|| Op2Unset
)
5691 // orc(1, y) = orc(x, 0) = 1
5692 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5696 ResNode
= MachineNode
->getOperand(0).getNode();
5699 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5700 MVT::i1
, MachineNode
->getOperand(1),
5701 MachineNode
->getOperand(1));
5703 // orc(~x, y) = ~(x & y) = nand(x, y)
5704 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
5705 MVT::i1
, MachineNode
->getOperand(0).
5707 MachineNode
->getOperand(1));
5709 // orc(x, ~y) = x | y
5710 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
5711 MVT::i1
, MachineNode
->getOperand(0),
5712 MachineNode
->getOperand(1).
5714 else if (AllUsersSelectZero(MachineNode
)) {
5715 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5716 MVT::i1
, MachineNode
->getOperand(1),
5717 MachineNode
->getOperand(0));
5721 case PPC::SELECT_I4
:
5722 case PPC::SELECT_I8
:
5723 case PPC::SELECT_F4
:
5724 case PPC::SELECT_F8
:
5725 case PPC::SELECT_QFRC
:
5726 case PPC::SELECT_QSRC
:
5727 case PPC::SELECT_QBRC
:
5728 case PPC::SELECT_SPE
:
5729 case PPC::SELECT_SPE4
:
5730 case PPC::SELECT_VRRC
:
5731 case PPC::SELECT_VSFRC
:
5732 case PPC::SELECT_VSSRC
:
5733 case PPC::SELECT_VSRC
:
5735 ResNode
= MachineNode
->getOperand(1).getNode();
5737 ResNode
= MachineNode
->getOperand(2).getNode();
5739 ResNode
= CurDAG
->getMachineNode(MachineNode
->getMachineOpcode(),
5741 MachineNode
->getValueType(0),
5742 MachineNode
->getOperand(0).
5744 MachineNode
->getOperand(2),
5745 MachineNode
->getOperand(1));
5750 ResNode
= CurDAG
->getMachineNode(Opcode
== PPC::BC
? PPC::BCn
:
5754 MachineNode
->getOperand(0).
5756 MachineNode
->getOperand(1),
5757 MachineNode
->getOperand(2));
5758 // FIXME: Handle Op1Set, Op1Unset here too.
5762 // If we're inverting this node because it is used only by selects that
5763 // we'd like to swap, then swap the selects before the node replacement.
5765 SwapAllSelectUsers(MachineNode
);
5767 if (ResNode
!= MachineNode
) {
5768 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5769 LLVM_DEBUG(MachineNode
->dump(CurDAG
));
5770 LLVM_DEBUG(dbgs() << "\nNew: ");
5771 LLVM_DEBUG(ResNode
->dump(CurDAG
));
5772 LLVM_DEBUG(dbgs() << "\n");
5774 ReplaceUses(MachineNode
, ResNode
);
5779 CurDAG
->RemoveDeadNodes();
5780 } while (IsModified
);
5783 // Gather the set of 32-bit operations that are known to have their
5784 // higher-order 32 bits zero, where ToPromote contains all such operations.
5785 static bool PeepholePPC64ZExtGather(SDValue Op32
,
5786 SmallPtrSetImpl
<SDNode
*> &ToPromote
) {
5787 if (!Op32
.isMachineOpcode())
5790 // First, check for the "frontier" instructions (those that will clear the
5791 // higher-order 32 bits.
5793 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
5794 // around. If it does not, then these instructions will clear the
5795 // higher-order bits.
5796 if ((Op32
.getMachineOpcode() == PPC::RLWINM
||
5797 Op32
.getMachineOpcode() == PPC::RLWNM
) &&
5798 Op32
.getConstantOperandVal(2) <= Op32
.getConstantOperandVal(3)) {
5799 ToPromote
.insert(Op32
.getNode());
5803 // SLW and SRW always clear the higher-order bits.
5804 if (Op32
.getMachineOpcode() == PPC::SLW
||
5805 Op32
.getMachineOpcode() == PPC::SRW
) {
5806 ToPromote
.insert(Op32
.getNode());
5810 // For LI and LIS, we need the immediate to be positive (so that it is not
5812 if (Op32
.getMachineOpcode() == PPC::LI
||
5813 Op32
.getMachineOpcode() == PPC::LIS
) {
5814 if (!isUInt
<15>(Op32
.getConstantOperandVal(0)))
5817 ToPromote
.insert(Op32
.getNode());
5821 // LHBRX and LWBRX always clear the higher-order bits.
5822 if (Op32
.getMachineOpcode() == PPC::LHBRX
||
5823 Op32
.getMachineOpcode() == PPC::LWBRX
) {
5824 ToPromote
.insert(Op32
.getNode());
5828 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
5829 if (Op32
.getMachineOpcode() == PPC::CNTLZW
||
5830 Op32
.getMachineOpcode() == PPC::CNTTZW
) {
5831 ToPromote
.insert(Op32
.getNode());
5835 // Next, check for those instructions we can look through.
5837 // Assuming the mask does not wrap around, then the higher-order bits are
5838 // taken directly from the first operand.
5839 if (Op32
.getMachineOpcode() == PPC::RLWIMI
&&
5840 Op32
.getConstantOperandVal(3) <= Op32
.getConstantOperandVal(4)) {
5841 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
5842 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
5845 ToPromote
.insert(Op32
.getNode());
5846 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
5850 // For OR, the higher-order bits are zero if that is true for both operands.
5851 // For SELECT_I4, the same is true (but the relevant operand numbers are
5853 if (Op32
.getMachineOpcode() == PPC::OR
||
5854 Op32
.getMachineOpcode() == PPC::SELECT_I4
) {
5855 unsigned B
= Op32
.getMachineOpcode() == PPC::SELECT_I4
? 1 : 0;
5856 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
5857 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+0), ToPromote1
))
5859 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+1), ToPromote1
))
5862 ToPromote
.insert(Op32
.getNode());
5863 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
5867 // For ORI and ORIS, we need the higher-order bits of the first operand to be
5868 // zero, and also for the constant to be positive (so that it is not sign
5870 if (Op32
.getMachineOpcode() == PPC::ORI
||
5871 Op32
.getMachineOpcode() == PPC::ORIS
) {
5872 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
5873 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
5875 if (!isUInt
<15>(Op32
.getConstantOperandVal(1)))
5878 ToPromote
.insert(Op32
.getNode());
5879 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
5883 // The higher-order bits of AND are zero if that is true for at least one of
5885 if (Op32
.getMachineOpcode() == PPC::AND
) {
5886 SmallPtrSet
<SDNode
*, 16> ToPromote1
, ToPromote2
;
5888 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
5890 PeepholePPC64ZExtGather(Op32
.getOperand(1), ToPromote2
);
5891 if (!Op0OK
&& !Op1OK
)
5894 ToPromote
.insert(Op32
.getNode());
5897 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
5900 ToPromote
.insert(ToPromote2
.begin(), ToPromote2
.end());
5905 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
5906 // of the first operand, or if the second operand is positive (so that it is
5907 // not sign extended).
5908 if (Op32
.getMachineOpcode() == PPC::ANDIo
||
5909 Op32
.getMachineOpcode() == PPC::ANDISo
) {
5910 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
5912 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
5913 bool Op1OK
= isUInt
<15>(Op32
.getConstantOperandVal(1));
5914 if (!Op0OK
&& !Op1OK
)
5917 ToPromote
.insert(Op32
.getNode());
5920 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
5928 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
5929 if (!PPCSubTarget
->isPPC64())
5932 // When we zero-extend from i32 to i64, we use a pattern like this:
5933 // def : Pat<(i64 (zext i32:$in)),
5934 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
5936 // There are several 32-bit shift/rotate instructions, however, that will
5937 // clear the higher-order bits of their output, rendering the RLDICL
5938 // unnecessary. When that happens, we remove it here, and redefine the
5939 // relevant 32-bit operation to be a 64-bit operation.
5941 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
5943 bool MadeChange
= false;
5944 while (Position
!= CurDAG
->allnodes_begin()) {
5945 SDNode
*N
= &*--Position
;
5946 // Skip dead nodes and any non-machine opcodes.
5947 if (N
->use_empty() || !N
->isMachineOpcode())
5950 if (N
->getMachineOpcode() != PPC::RLDICL
)
5953 if (N
->getConstantOperandVal(1) != 0 ||
5954 N
->getConstantOperandVal(2) != 32)
5957 SDValue ISR
= N
->getOperand(0);
5958 if (!ISR
.isMachineOpcode() ||
5959 ISR
.getMachineOpcode() != TargetOpcode::INSERT_SUBREG
)
5962 if (!ISR
.hasOneUse())
5965 if (ISR
.getConstantOperandVal(2) != PPC::sub_32
)
5968 SDValue IDef
= ISR
.getOperand(0);
5969 if (!IDef
.isMachineOpcode() ||
5970 IDef
.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF
)
5973 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
5974 // can get rid of it.
5976 SDValue Op32
= ISR
->getOperand(1);
5977 if (!Op32
.isMachineOpcode())
5980 // There are some 32-bit instructions that always clear the high-order 32
5981 // bits, there are also some instructions (like AND) that we can look
5983 SmallPtrSet
<SDNode
*, 16> ToPromote
;
5984 if (!PeepholePPC64ZExtGather(Op32
, ToPromote
))
5987 // If the ToPromote set contains nodes that have uses outside of the set
5988 // (except for the original INSERT_SUBREG), then abort the transformation.
5989 bool OutsideUse
= false;
5990 for (SDNode
*PN
: ToPromote
) {
5991 for (SDNode
*UN
: PN
->uses()) {
5992 if (!ToPromote
.count(UN
) && UN
!= ISR
.getNode()) {
6006 // We now know that this zero extension can be removed by promoting to
6007 // nodes in ToPromote to 64-bit operations, where for operations in the
6008 // frontier of the set, we need to insert INSERT_SUBREGs for their
6010 for (SDNode
*PN
: ToPromote
) {
6012 switch (PN
->getMachineOpcode()) {
6014 llvm_unreachable("Don't know the 64-bit variant of this instruction");
6015 case PPC::RLWINM
: NewOpcode
= PPC::RLWINM8
; break;
6016 case PPC::RLWNM
: NewOpcode
= PPC::RLWNM8
; break;
6017 case PPC::SLW
: NewOpcode
= PPC::SLW8
; break;
6018 case PPC::SRW
: NewOpcode
= PPC::SRW8
; break;
6019 case PPC::LI
: NewOpcode
= PPC::LI8
; break;
6020 case PPC::LIS
: NewOpcode
= PPC::LIS8
; break;
6021 case PPC::LHBRX
: NewOpcode
= PPC::LHBRX8
; break;
6022 case PPC::LWBRX
: NewOpcode
= PPC::LWBRX8
; break;
6023 case PPC::CNTLZW
: NewOpcode
= PPC::CNTLZW8
; break;
6024 case PPC::CNTTZW
: NewOpcode
= PPC::CNTTZW8
; break;
6025 case PPC::RLWIMI
: NewOpcode
= PPC::RLWIMI8
; break;
6026 case PPC::OR
: NewOpcode
= PPC::OR8
; break;
6027 case PPC::SELECT_I4
: NewOpcode
= PPC::SELECT_I8
; break;
6028 case PPC::ORI
: NewOpcode
= PPC::ORI8
; break;
6029 case PPC::ORIS
: NewOpcode
= PPC::ORIS8
; break;
6030 case PPC::AND
: NewOpcode
= PPC::AND8
; break;
6031 case PPC::ANDIo
: NewOpcode
= PPC::ANDIo8
; break;
6032 case PPC::ANDISo
: NewOpcode
= PPC::ANDISo8
; break;
6035 // Note: During the replacement process, the nodes will be in an
6036 // inconsistent state (some instructions will have operands with values
6037 // of the wrong type). Once done, however, everything should be right
6040 SmallVector
<SDValue
, 4> Ops
;
6041 for (const SDValue
&V
: PN
->ops()) {
6042 if (!ToPromote
.count(V
.getNode()) && V
.getValueType() == MVT::i32
&&
6043 !isa
<ConstantSDNode
>(V
)) {
6044 SDValue ReplOpOps
[] = { ISR
.getOperand(0), V
, ISR
.getOperand(2) };
6046 CurDAG
->getMachineNode(TargetOpcode::INSERT_SUBREG
, SDLoc(V
),
6047 ISR
.getNode()->getVTList(), ReplOpOps
);
6048 Ops
.push_back(SDValue(ReplOp
, 0));
6054 // Because all to-be-promoted nodes only have users that are other
6055 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6056 // the i32 result value type with i64.
6058 SmallVector
<EVT
, 2> NewVTs
;
6059 SDVTList VTs
= PN
->getVTList();
6060 for (unsigned i
= 0, ie
= VTs
.NumVTs
; i
!= ie
; ++i
)
6061 if (VTs
.VTs
[i
] == MVT::i32
)
6062 NewVTs
.push_back(MVT::i64
);
6064 NewVTs
.push_back(VTs
.VTs
[i
]);
6066 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
6067 LLVM_DEBUG(PN
->dump(CurDAG
));
6069 CurDAG
->SelectNodeTo(PN
, NewOpcode
, CurDAG
->getVTList(NewVTs
), Ops
);
6071 LLVM_DEBUG(dbgs() << "\nNew: ");
6072 LLVM_DEBUG(PN
->dump(CurDAG
));
6073 LLVM_DEBUG(dbgs() << "\n");
6076 // Now we replace the original zero extend and its associated INSERT_SUBREG
6077 // with the value feeding the INSERT_SUBREG (which has now been promoted to
6080 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
6081 LLVM_DEBUG(N
->dump(CurDAG
));
6082 LLVM_DEBUG(dbgs() << "\nNew: ");
6083 LLVM_DEBUG(Op32
.getNode()->dump(CurDAG
));
6084 LLVM_DEBUG(dbgs() << "\n");
6086 ReplaceUses(N
, Op32
.getNode());
6090 CurDAG
->RemoveDeadNodes();
6093 void PPCDAGToDAGISel::PeepholePPC64() {
6094 // These optimizations are currently supported only for 64-bit SVR4.
6095 if (PPCSubTarget
->isDarwin() || !PPCSubTarget
->isPPC64())
6098 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6100 while (Position
!= CurDAG
->allnodes_begin()) {
6101 SDNode
*N
= &*--Position
;
6102 // Skip dead nodes and any non-machine opcodes.
6103 if (N
->use_empty() || !N
->isMachineOpcode())
6107 unsigned StorageOpcode
= N
->getMachineOpcode();
6108 bool RequiresMod4Offset
= false;
6110 switch (StorageOpcode
) {
6115 case PPC::DFLOADf64
:
6116 case PPC::DFLOADf32
:
6117 RequiresMod4Offset
= true;
6133 case PPC::DFSTOREf64
:
6134 case PPC::DFSTOREf32
:
6135 RequiresMod4Offset
= true;
6149 // If this is a load or store with a zero offset, or within the alignment,
6150 // we may be able to fold an add-immediate into the memory operation.
6151 // The check against alignment is below, as it can't occur until we check
6152 // the arguments to N
6153 if (!isa
<ConstantSDNode
>(N
->getOperand(FirstOp
)))
6156 SDValue Base
= N
->getOperand(FirstOp
+ 1);
6157 if (!Base
.isMachineOpcode())
6161 bool ReplaceFlags
= true;
6163 // When the feeding operation is an add-immediate of some sort,
6164 // determine whether we need to add relocation information to the
6165 // target flags on the immediate operand when we fold it into the
6166 // load instruction.
6168 // For something like ADDItocL, the relocation information is
6169 // inferred from the opcode; when we process it in the AsmPrinter,
6170 // we add the necessary relocation there. A load, though, can receive
6171 // relocation from various flavors of ADDIxxx, so we need to carry
6172 // the relocation information in the target flags.
6173 switch (Base
.getMachineOpcode()) {
6178 // In some cases (such as TLS) the relocation information
6179 // is already in place on the operand, so copying the operand
6181 ReplaceFlags
= false;
6182 // For these cases, the immediate may not be divisible by 4, in
6183 // which case the fold is illegal for DS-form instructions. (The
6184 // other cases provide aligned addresses and are always safe.)
6185 if (RequiresMod4Offset
&&
6186 (!isa
<ConstantSDNode
>(Base
.getOperand(1)) ||
6187 Base
.getConstantOperandVal(1) % 4 != 0))
6190 case PPC::ADDIdtprelL
:
6191 Flags
= PPCII::MO_DTPREL_LO
;
6193 case PPC::ADDItlsldL
:
6194 Flags
= PPCII::MO_TLSLD_LO
;
6197 Flags
= PPCII::MO_TOC_LO
;
6201 SDValue ImmOpnd
= Base
.getOperand(1);
6203 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6204 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6205 // we might have needed different @ha relocation values for the offset
6207 int MaxDisplacement
= 7;
6208 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
6209 const GlobalValue
*GV
= GA
->getGlobal();
6210 MaxDisplacement
= std::min((int) GV
->getAlignment() - 1, MaxDisplacement
);
6213 bool UpdateHBase
= false;
6214 SDValue HBase
= Base
.getOperand(0);
6216 int Offset
= N
->getConstantOperandVal(FirstOp
);
6218 if (Offset
< 0 || Offset
> MaxDisplacement
) {
6219 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6220 // one use, then we can do this for any offset, we just need to also
6221 // update the offset (i.e. the symbol addend) on the addis also.
6222 if (Base
.getMachineOpcode() != PPC::ADDItocL
)
6225 if (!HBase
.isMachineOpcode() ||
6226 HBase
.getMachineOpcode() != PPC::ADDIStocHA
)
6229 if (!Base
.hasOneUse() || !HBase
.hasOneUse())
6232 SDValue HImmOpnd
= HBase
.getOperand(1);
6233 if (HImmOpnd
!= ImmOpnd
)
6239 // If we're directly folding the addend from an addi instruction, then:
6240 // 1. In general, the offset on the memory access must be zero.
6241 // 2. If the addend is a constant, then it can be combined with a
6242 // non-zero offset, but only if the result meets the encoding
6244 if (auto *C
= dyn_cast
<ConstantSDNode
>(ImmOpnd
)) {
6245 Offset
+= C
->getSExtValue();
6247 if (RequiresMod4Offset
&& (Offset
% 4) != 0)
6250 if (!isInt
<16>(Offset
))
6253 ImmOpnd
= CurDAG
->getTargetConstant(Offset
, SDLoc(ImmOpnd
),
6254 ImmOpnd
.getValueType());
6255 } else if (Offset
!= 0) {
6260 // We found an opportunity. Reverse the operands from the add
6261 // immediate and substitute them into the load or store. If
6262 // needed, update the target flags for the immediate operand to
6263 // reflect the necessary relocation information.
6264 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
6265 LLVM_DEBUG(Base
->dump(CurDAG
));
6266 LLVM_DEBUG(dbgs() << "\nN: ");
6267 LLVM_DEBUG(N
->dump(CurDAG
));
6268 LLVM_DEBUG(dbgs() << "\n");
6270 // If the relocation information isn't already present on the
6271 // immediate operand, add it now.
6273 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
6275 const GlobalValue
*GV
= GA
->getGlobal();
6276 // We can't perform this optimization for data whose alignment
6277 // is insufficient for the instruction encoding.
6278 if (GV
->getAlignment() < 4 &&
6279 (RequiresMod4Offset
|| (Offset
% 4) != 0)) {
6280 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6283 ImmOpnd
= CurDAG
->getTargetGlobalAddress(GV
, dl
, MVT::i64
, Offset
, Flags
);
6284 } else if (ConstantPoolSDNode
*CP
=
6285 dyn_cast
<ConstantPoolSDNode
>(ImmOpnd
)) {
6286 const Constant
*C
= CP
->getConstVal();
6287 ImmOpnd
= CurDAG
->getTargetConstantPool(C
, MVT::i64
,
6293 if (FirstOp
== 1) // Store
6294 (void)CurDAG
->UpdateNodeOperands(N
, N
->getOperand(0), ImmOpnd
,
6295 Base
.getOperand(0), N
->getOperand(3));
6297 (void)CurDAG
->UpdateNodeOperands(N
, ImmOpnd
, Base
.getOperand(0),
6301 (void)CurDAG
->UpdateNodeOperands(HBase
.getNode(), HBase
.getOperand(0),
6304 // The add-immediate may now be dead, in which case remove it.
6305 if (Base
.getNode()->use_empty())
6306 CurDAG
->RemoveDeadNode(Base
.getNode());
6310 /// createPPCISelDag - This pass converts a legalized DAG into a
6311 /// PowerPC-specific DAG, ready for instruction scheduling.
6313 FunctionPass
*llvm::createPPCISelDag(PPCTargetMachine
&TM
,
6314 CodeGenOpt::Level OptLevel
) {
6315 return new PPCDAGToDAGISel(TM
, OptLevel
);