1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
12 //===----------------------------------------------------------------------===//
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "MCTargetDesc/PPCPredicates.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/BranchProbabilityInfo.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGISel.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetRegisterInfo.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/KnownBits.h"
54 #include "llvm/Support/MachineValueType.h"
55 #include "llvm/Support/MathExtras.h"
56 #include "llvm/Support/raw_ostream.h"
69 #define DEBUG_TYPE "ppc-codegen"
71 STATISTIC(NumSextSetcc
,
72 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
73 STATISTIC(NumZextSetcc
,
74 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
75 STATISTIC(SignExtensionsAdded
,
76 "Number of sign extensions for compare inputs added.");
77 STATISTIC(ZeroExtensionsAdded
,
78 "Number of zero extensions for compare inputs added.");
79 STATISTIC(NumLogicOpsOnComparison
,
80 "Number of logical ops on i1 values calculated in GPR.");
81 STATISTIC(OmittedForNonExtendUses
,
82 "Number of compares not eliminated as they have non-extending uses.");
84 "Number of compares lowered to setb.");
86 // FIXME: Remove this once the bug has been fixed!
87 cl::opt
<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
88 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden
);
91 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
92 cl::desc("use aggressive ppc isel for bit permutations"),
94 static cl::opt
<bool> BPermRewriterNoMasking(
95 "ppc-bit-perm-rewriter-stress-rotates",
96 cl::desc("stress rotate selection in aggressive ppc isel for "
100 static cl::opt
<bool> EnableBranchHint(
101 "ppc-use-branch-hint", cl::init(true),
102 cl::desc("Enable static hinting of branches on ppc"),
105 static cl::opt
<bool> EnableTLSOpt(
106 "ppc-tls-opt", cl::init(true),
107 cl::desc("Enable tls optimization peephole"),
110 enum ICmpInGPRType
{ ICGPR_All
, ICGPR_None
, ICGPR_I32
, ICGPR_I64
,
111 ICGPR_NonExtIn
, ICGPR_Zext
, ICGPR_Sext
, ICGPR_ZextI32
,
112 ICGPR_SextI32
, ICGPR_ZextI64
, ICGPR_SextI64
};
114 static cl::opt
<ICmpInGPRType
> CmpInGPR(
115 "ppc-gpr-icmps", cl::Hidden
, cl::init(ICGPR_All
),
116 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
117 cl::values(clEnumValN(ICGPR_None
, "none", "Do not modify integer comparisons."),
118 clEnumValN(ICGPR_All
, "all", "All possible int comparisons in GPRs."),
119 clEnumValN(ICGPR_I32
, "i32", "Only i32 comparisons in GPRs."),
120 clEnumValN(ICGPR_I64
, "i64", "Only i64 comparisons in GPRs."),
121 clEnumValN(ICGPR_NonExtIn
, "nonextin",
122 "Only comparisons where inputs don't need [sz]ext."),
123 clEnumValN(ICGPR_Zext
, "zext", "Only comparisons with zext result."),
124 clEnumValN(ICGPR_ZextI32
, "zexti32",
125 "Only i32 comparisons with zext result."),
126 clEnumValN(ICGPR_ZextI64
, "zexti64",
127 "Only i64 comparisons with zext result."),
128 clEnumValN(ICGPR_Sext
, "sext", "Only comparisons with sext result."),
129 clEnumValN(ICGPR_SextI32
, "sexti32",
130 "Only i32 comparisons with sext result."),
131 clEnumValN(ICGPR_SextI64
, "sexti64",
132 "Only i64 comparisons with sext result.")));
135 //===--------------------------------------------------------------------===//
136 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
137 /// instructions for SelectionDAG operations.
139 class PPCDAGToDAGISel
: public SelectionDAGISel
{
140 const PPCTargetMachine
&TM
;
141 const PPCSubtarget
*PPCSubTarget
;
142 const PPCTargetLowering
*PPCLowering
;
143 unsigned GlobalBaseReg
;
146 explicit PPCDAGToDAGISel(PPCTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
147 : SelectionDAGISel(tm
, OptLevel
), TM(tm
) {}
149 bool runOnMachineFunction(MachineFunction
&MF
) override
{
150 // Make sure we re-emit a set of the global base reg if necessary
152 PPCSubTarget
= &MF
.getSubtarget
<PPCSubtarget
>();
153 PPCLowering
= PPCSubTarget
->getTargetLowering();
154 SelectionDAGISel::runOnMachineFunction(MF
);
156 if (!PPCSubTarget
->isSVR4ABI())
157 InsertVRSaveCode(MF
);
162 void PreprocessISelDAG() override
;
163 void PostprocessISelDAG() override
;
165 /// getI16Imm - Return a target constant with the specified value, of type
167 inline SDValue
getI16Imm(unsigned Imm
, const SDLoc
&dl
) {
168 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i16
);
171 /// getI32Imm - Return a target constant with the specified value, of type
173 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
174 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
177 /// getI64Imm - Return a target constant with the specified value, of type
179 inline SDValue
getI64Imm(uint64_t Imm
, const SDLoc
&dl
) {
180 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
183 /// getSmallIPtrImm - Return a target constant of pointer type.
184 inline SDValue
getSmallIPtrImm(unsigned Imm
, const SDLoc
&dl
) {
185 return CurDAG
->getTargetConstant(
186 Imm
, dl
, PPCLowering
->getPointerTy(CurDAG
->getDataLayout()));
189 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
190 /// rotate and mask opcode and mask operation.
191 static bool isRotateAndMask(SDNode
*N
, unsigned Mask
, bool isShiftMask
,
192 unsigned &SH
, unsigned &MB
, unsigned &ME
);
194 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
195 /// base register. Return the virtual register that holds this value.
196 SDNode
*getGlobalBaseReg();
198 void selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
= 0);
200 // Select - Convert the specified operand from a target-independent to a
201 // target-specific node if it hasn't already been changed.
202 void Select(SDNode
*N
) override
;
204 bool tryBitfieldInsert(SDNode
*N
);
205 bool tryBitPermutation(SDNode
*N
);
206 bool tryIntCompareInGPR(SDNode
*N
);
208 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
209 // an X-Form load instruction with the offset being a relocation coming from
210 // the PPCISD::ADD_TLS.
211 bool tryTLSXFormLoad(LoadSDNode
*N
);
212 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
213 // an X-Form store instruction with the offset being a relocation coming from
214 // the PPCISD::ADD_TLS.
215 bool tryTLSXFormStore(StoreSDNode
*N
);
216 /// SelectCC - Select a comparison of the specified values with the
217 /// specified condition code, returning the CR# of the expression.
218 SDValue
SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
221 /// SelectAddrImm - Returns true if the address N can be represented by
222 /// a base register plus a signed 16-bit displacement [r+imm].
223 bool SelectAddrImm(SDValue N
, SDValue
&Disp
,
225 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 0);
228 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
229 /// immediate field. Note that the operand at this point is already the
230 /// result of a prior SelectAddressRegImm call.
231 bool SelectAddrImmOffs(SDValue N
, SDValue
&Out
) const {
232 if (N
.getOpcode() == ISD::TargetConstant
||
233 N
.getOpcode() == ISD::TargetGlobalAddress
) {
241 /// SelectAddrIdx - Given the specified addressed, check to see if it can be
242 /// represented as an indexed [r+r] operation. Returns false if it can
243 /// be represented by [r+imm], which are preferred.
244 bool SelectAddrIdx(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
245 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
);
248 /// SelectAddrIdxOnly - Given the specified addressed, force it to be
249 /// represented as an indexed [r+r] operation.
250 bool SelectAddrIdxOnly(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
251 return PPCLowering
->SelectAddressRegRegOnly(N
, Base
, Index
, *CurDAG
);
254 /// SelectAddrImmX4 - Returns true if the address N can be represented by
255 /// a base register plus a signed 16-bit displacement that is a multiple of 4.
256 /// Suitable for use by STD and friends.
257 bool SelectAddrImmX4(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
258 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 4);
261 bool SelectAddrImmX16(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
262 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 16);
265 // Select an address into a single register.
266 bool SelectAddr(SDValue N
, SDValue
&Base
) {
271 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
272 /// inline asm expressions. It is always correct to compute the value into
273 /// a register. The case of adding a (possibly relocatable) constant to a
274 /// register can be improved, but it is wrong to substitute Reg+Reg for
275 /// Reg in an asm, because the load or store opcode would have to change.
276 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
277 unsigned ConstraintID
,
278 std::vector
<SDValue
> &OutOps
) override
{
279 switch(ConstraintID
) {
281 errs() << "ConstraintID: " << ConstraintID
<< "\n";
282 llvm_unreachable("Unexpected asm memory constraint");
283 case InlineAsm::Constraint_es
:
284 case InlineAsm::Constraint_i
:
285 case InlineAsm::Constraint_m
:
286 case InlineAsm::Constraint_o
:
287 case InlineAsm::Constraint_Q
:
288 case InlineAsm::Constraint_Z
:
289 case InlineAsm::Constraint_Zy
:
290 // We need to make sure that this one operand does not end up in r0
291 // (because we might end up lowering this as 0(%op)).
292 const TargetRegisterInfo
*TRI
= PPCSubTarget
->getRegisterInfo();
293 const TargetRegisterClass
*TRC
= TRI
->getPointerRegClass(*MF
, /*Kind=*/1);
295 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), dl
, MVT::i32
);
297 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
298 dl
, Op
.getValueType(),
301 OutOps
.push_back(NewOp
);
307 void InsertVRSaveCode(MachineFunction
&MF
);
309 StringRef
getPassName() const override
{
310 return "PowerPC DAG->DAG Pattern Instruction Selection";
313 // Include the pieces autogenerated from the target description.
314 #include "PPCGenDAGISel.inc"
317 bool trySETCC(SDNode
*N
);
319 void PeepholePPC64();
320 void PeepholePPC64ZExt();
321 void PeepholeCROps();
323 SDValue
combineToCMPB(SDNode
*N
);
324 void foldBoolExts(SDValue
&Res
, SDNode
*&N
);
326 bool AllUsersSelectZero(SDNode
*N
);
327 void SwapAllSelectUsers(SDNode
*N
);
329 bool isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const;
330 void transferMemOperands(SDNode
*N
, SDNode
*Result
);
333 } // end anonymous namespace
335 /// InsertVRSaveCode - Once the entire function has been instruction selected,
336 /// all virtual registers are created and all machine instructions are built,
337 /// check to see if we need to save/restore VRSAVE. If so, do it.
338 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction
&Fn
) {
339 // Check to see if this function uses vector registers, which means we have to
340 // save and restore the VRSAVE register and update it with the regs we use.
342 // In this case, there will be virtual registers of vector type created
343 // by the scheduler. Detect them now.
344 bool HasVectorVReg
= false;
345 for (unsigned i
= 0, e
= RegInfo
->getNumVirtRegs(); i
!= e
; ++i
) {
346 unsigned Reg
= TargetRegisterInfo::index2VirtReg(i
);
347 if (RegInfo
->getRegClass(Reg
) == &PPC::VRRCRegClass
) {
348 HasVectorVReg
= true;
352 if (!HasVectorVReg
) return; // nothing to do.
354 // If we have a vector register, we want to emit code into the entry and exit
355 // blocks to save and restore the VRSAVE register. We do this here (instead
356 // of marking all vector instructions as clobbering VRSAVE) for two reasons:
358 // 1. This (trivially) reduces the load on the register allocator, by not
359 // having to represent the live range of the VRSAVE register.
360 // 2. This (more significantly) allows us to create a temporary virtual
361 // register to hold the saved VRSAVE value, allowing this temporary to be
362 // register allocated, instead of forcing it to be spilled to the stack.
364 // Create two vregs - one to hold the VRSAVE register that is live-in to the
365 // function and one for the value after having bits or'd into it.
366 unsigned InVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
367 unsigned UpdatedVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
369 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
370 MachineBasicBlock
&EntryBB
= *Fn
.begin();
372 // Emit the following code into the entry block:
373 // InVRSAVE = MFVRSAVE
374 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
375 // MTVRSAVE UpdatedVRSAVE
376 MachineBasicBlock::iterator IP
= EntryBB
.begin(); // Insert Point
377 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MFVRSAVE
), InVRSAVE
);
378 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::UPDATE_VRSAVE
),
379 UpdatedVRSAVE
).addReg(InVRSAVE
);
380 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(UpdatedVRSAVE
);
382 // Find all return blocks, outputting a restore in each epilog.
383 for (MachineFunction::iterator BB
= Fn
.begin(), E
= Fn
.end(); BB
!= E
; ++BB
) {
384 if (BB
->isReturnBlock()) {
385 IP
= BB
->end(); --IP
;
387 // Skip over all terminator instructions, which are part of the return
389 MachineBasicBlock::iterator I2
= IP
;
390 while (I2
!= BB
->begin() && (--I2
)->isTerminator())
393 // Emit: MTVRSAVE InVRSave
394 BuildMI(*BB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(InVRSAVE
);
399 /// getGlobalBaseReg - Output the instructions required to put the
400 /// base address to use for accessing globals into a register.
402 SDNode
*PPCDAGToDAGISel::getGlobalBaseReg() {
403 if (!GlobalBaseReg
) {
404 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
405 // Insert the set of GlobalBaseReg into the first MBB of the function
406 MachineBasicBlock
&FirstMBB
= MF
->front();
407 MachineBasicBlock::iterator MBBI
= FirstMBB
.begin();
408 const Module
*M
= MF
->getFunction().getParent();
411 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) == MVT::i32
) {
412 if (PPCSubTarget
->isTargetELF()) {
413 GlobalBaseReg
= PPC::R30
;
414 if (M
->getPICLevel() == PICLevel::SmallPIC
) {
415 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MoveGOTtoLR
));
416 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
417 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
419 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
420 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
421 unsigned TempReg
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
422 BuildMI(FirstMBB
, MBBI
, dl
,
423 TII
.get(PPC::UpdateGBR
), GlobalBaseReg
)
424 .addReg(TempReg
, RegState::Define
).addReg(GlobalBaseReg
);
425 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
429 RegInfo
->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass
);
430 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
431 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
434 // We must ensure that this sequence is dominated by the prologue.
435 // FIXME: This is a bit of a big hammer since we don't get the benefits
436 // of shrink-wrapping whenever we emit this instruction. Considering
437 // this is used in any function where we emit a jump table, this may be
438 // a significant limitation. We should consider inserting this in the
439 // block where it is used and then commoning this sequence up if it
440 // appears in multiple places.
441 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
443 MF
->getInfo
<PPCFunctionInfo
>()->setShrinkWrapDisabled(true);
444 GlobalBaseReg
= RegInfo
->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
445 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR8
));
446 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR8
), GlobalBaseReg
);
449 return CurDAG
->getRegister(GlobalBaseReg
,
450 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()))
454 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
455 /// operand. If so Imm will receive the 32-bit value.
456 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
457 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
458 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
464 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
465 /// operand. If so Imm will receive the 64-bit value.
466 static bool isInt64Immediate(SDNode
*N
, uint64_t &Imm
) {
467 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i64
) {
468 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
474 // isInt32Immediate - This method tests to see if a constant operand.
475 // If so Imm will receive the 32 bit value.
476 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
477 return isInt32Immediate(N
.getNode(), Imm
);
480 /// isInt64Immediate - This method tests to see if the value is a 64-bit
481 /// constant operand. If so Imm will receive the 64-bit value.
482 static bool isInt64Immediate(SDValue N
, uint64_t &Imm
) {
483 return isInt64Immediate(N
.getNode(), Imm
);
486 static unsigned getBranchHint(unsigned PCC
, FunctionLoweringInfo
*FuncInfo
,
487 const SDValue
&DestMBB
) {
488 assert(isa
<BasicBlockSDNode
>(DestMBB
));
490 if (!FuncInfo
->BPI
) return PPC::BR_NO_HINT
;
492 const BasicBlock
*BB
= FuncInfo
->MBB
->getBasicBlock();
493 const Instruction
*BBTerm
= BB
->getTerminator();
495 if (BBTerm
->getNumSuccessors() != 2) return PPC::BR_NO_HINT
;
497 const BasicBlock
*TBB
= BBTerm
->getSuccessor(0);
498 const BasicBlock
*FBB
= BBTerm
->getSuccessor(1);
500 auto TProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, TBB
);
501 auto FProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, FBB
);
503 // We only want to handle cases which are easy to predict at static time, e.g.
504 // C++ throw statement, that is very likely not taken, or calling never
505 // returned function, e.g. stdlib exit(). So we set Threshold to filter
508 // Below is LLVM branch weight table, we only want to handle case 1, 2
510 // Case Taken:Nontaken Example
511 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
512 // 2. Invoke-terminating 1:1048575
513 // 3. Coldblock 4:64 __builtin_expect
514 // 4. Loop Branch 124:4 For loop
515 // 5. PH/ZH/FPH 20:12
516 const uint32_t Threshold
= 10000;
518 if (std::max(TProb
, FProb
) / Threshold
< std::min(TProb
, FProb
))
519 return PPC::BR_NO_HINT
;
521 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo
->Fn
->getName()
522 << "::" << BB
->getName() << "'\n"
523 << " -> " << TBB
->getName() << ": " << TProb
<< "\n"
524 << " -> " << FBB
->getName() << ": " << FProb
<< "\n");
526 const BasicBlockSDNode
*BBDN
= cast
<BasicBlockSDNode
>(DestMBB
);
528 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
529 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
530 if (BBDN
->getBasicBlock()->getBasicBlock() != TBB
)
531 std::swap(TProb
, FProb
);
533 return (TProb
> FProb
) ? PPC::BR_TAKEN_HINT
: PPC::BR_NONTAKEN_HINT
;
536 // isOpcWithIntImmediate - This method tests to see if the node is a specific
537 // opcode and that it has a immediate integer right operand.
538 // If so Imm will receive the 32 bit value.
539 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
540 return N
->getOpcode() == Opc
541 && isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
544 void PPCDAGToDAGISel::selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
) {
546 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
547 SDValue TFI
= CurDAG
->getTargetFrameIndex(FI
, N
->getValueType(0));
548 unsigned Opc
= N
->getValueType(0) == MVT::i32
? PPC::ADDI
: PPC::ADDI8
;
550 CurDAG
->SelectNodeTo(SN
, Opc
, N
->getValueType(0), TFI
,
551 getSmallIPtrImm(Offset
, dl
));
553 ReplaceNode(SN
, CurDAG
->getMachineNode(Opc
, dl
, N
->getValueType(0), TFI
,
554 getSmallIPtrImm(Offset
, dl
)));
557 bool PPCDAGToDAGISel::isRotateAndMask(SDNode
*N
, unsigned Mask
,
558 bool isShiftMask
, unsigned &SH
,
559 unsigned &MB
, unsigned &ME
) {
560 // Don't even go down this path for i64, since different logic will be
561 // necessary for rldicl/rldicr/rldimi.
562 if (N
->getValueType(0) != MVT::i32
)
566 unsigned Indeterminant
= ~0; // bit mask marking indeterminant results
567 unsigned Opcode
= N
->getOpcode();
568 if (N
->getNumOperands() != 2 ||
569 !isInt32Immediate(N
->getOperand(1).getNode(), Shift
) || (Shift
> 31))
572 if (Opcode
== ISD::SHL
) {
573 // apply shift left to mask if it comes first
574 if (isShiftMask
) Mask
= Mask
<< Shift
;
575 // determine which bits are made indeterminant by shift
576 Indeterminant
= ~(0xFFFFFFFFu
<< Shift
);
577 } else if (Opcode
== ISD::SRL
) {
578 // apply shift right to mask if it comes first
579 if (isShiftMask
) Mask
= Mask
>> Shift
;
580 // determine which bits are made indeterminant by shift
581 Indeterminant
= ~(0xFFFFFFFFu
>> Shift
);
582 // adjust for the left rotate
584 } else if (Opcode
== ISD::ROTL
) {
590 // if the mask doesn't intersect any Indeterminant bits
591 if (Mask
&& !(Mask
& Indeterminant
)) {
593 // make sure the mask is still a mask (wrap arounds may not be)
594 return isRunOfOnes(Mask
, MB
, ME
);
599 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode
*ST
) {
600 SDValue Base
= ST
->getBasePtr();
601 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
603 SDValue Offset
= ST
->getOffset();
604 if (!Offset
.isUndef())
608 EVT MemVT
= ST
->getMemoryVT();
609 EVT RegVT
= ST
->getValue().getValueType();
612 switch (MemVT
.getSimpleVT().SimpleTy
) {
616 Opcode
= (RegVT
== MVT::i32
) ? PPC::STBXTLS_32
: PPC::STBXTLS
;
620 Opcode
= (RegVT
== MVT::i32
) ? PPC::STHXTLS_32
: PPC::STHXTLS
;
624 Opcode
= (RegVT
== MVT::i32
) ? PPC::STWXTLS_32
: PPC::STWXTLS
;
628 Opcode
= PPC::STDXTLS
;
632 SDValue Chain
= ST
->getChain();
633 SDVTList VTs
= ST
->getVTList();
634 SDValue Ops
[] = {ST
->getValue(), Base
.getOperand(0), Base
.getOperand(1),
636 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
637 transferMemOperands(ST
, MN
);
642 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode
*LD
) {
643 SDValue Base
= LD
->getBasePtr();
644 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
646 SDValue Offset
= LD
->getOffset();
647 if (!Offset
.isUndef())
651 EVT MemVT
= LD
->getMemoryVT();
652 EVT RegVT
= LD
->getValueType(0);
654 switch (MemVT
.getSimpleVT().SimpleTy
) {
658 Opcode
= (RegVT
== MVT::i32
) ? PPC::LBZXTLS_32
: PPC::LBZXTLS
;
662 Opcode
= (RegVT
== MVT::i32
) ? PPC::LHZXTLS_32
: PPC::LHZXTLS
;
666 Opcode
= (RegVT
== MVT::i32
) ? PPC::LWZXTLS_32
: PPC::LWZXTLS
;
670 Opcode
= PPC::LDXTLS
;
674 SDValue Chain
= LD
->getChain();
675 SDVTList VTs
= LD
->getVTList();
676 SDValue Ops
[] = {Base
.getOperand(0), Base
.getOperand(1), Chain
};
677 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
678 transferMemOperands(LD
, MN
);
683 /// Turn an or of two masked values into the rotate left word immediate then
684 /// mask insert (rlwimi) instruction.
685 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode
*N
) {
686 SDValue Op0
= N
->getOperand(0);
687 SDValue Op1
= N
->getOperand(1);
690 KnownBits LKnown
= CurDAG
->computeKnownBits(Op0
);
691 KnownBits RKnown
= CurDAG
->computeKnownBits(Op1
);
693 unsigned TargetMask
= LKnown
.Zero
.getZExtValue();
694 unsigned InsertMask
= RKnown
.Zero
.getZExtValue();
696 if ((TargetMask
| InsertMask
) == 0xFFFFFFFF) {
697 unsigned Op0Opc
= Op0
.getOpcode();
698 unsigned Op1Opc
= Op1
.getOpcode();
699 unsigned Value
, SH
= 0;
700 TargetMask
= ~TargetMask
;
701 InsertMask
= ~InsertMask
;
703 // If the LHS has a foldable shift and the RHS does not, then swap it to the
704 // RHS so that we can fold the shift into the insert.
705 if (Op0Opc
== ISD::AND
&& Op1Opc
== ISD::AND
) {
706 if (Op0
.getOperand(0).getOpcode() == ISD::SHL
||
707 Op0
.getOperand(0).getOpcode() == ISD::SRL
) {
708 if (Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
709 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
711 std::swap(Op0Opc
, Op1Opc
);
712 std::swap(TargetMask
, InsertMask
);
715 } else if (Op0Opc
== ISD::SHL
|| Op0Opc
== ISD::SRL
) {
716 if (Op1Opc
== ISD::AND
&& Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
717 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
719 std::swap(Op0Opc
, Op1Opc
);
720 std::swap(TargetMask
, InsertMask
);
725 if (isRunOfOnes(InsertMask
, MB
, ME
)) {
726 if ((Op1Opc
== ISD::SHL
|| Op1Opc
== ISD::SRL
) &&
727 isInt32Immediate(Op1
.getOperand(1), Value
)) {
728 Op1
= Op1
.getOperand(0);
729 SH
= (Op1Opc
== ISD::SHL
) ? Value
: 32 - Value
;
731 if (Op1Opc
== ISD::AND
) {
732 // The AND mask might not be a constant, and we need to make sure that
733 // if we're going to fold the masking with the insert, all bits not
734 // know to be zero in the mask are known to be one.
735 KnownBits MKnown
= CurDAG
->computeKnownBits(Op1
.getOperand(1));
736 bool CanFoldMask
= InsertMask
== MKnown
.One
.getZExtValue();
738 unsigned SHOpc
= Op1
.getOperand(0).getOpcode();
739 if ((SHOpc
== ISD::SHL
|| SHOpc
== ISD::SRL
) && CanFoldMask
&&
740 isInt32Immediate(Op1
.getOperand(0).getOperand(1), Value
)) {
741 // Note that Value must be in range here (less than 32) because
742 // otherwise there would not be any bits set in InsertMask.
743 Op1
= Op1
.getOperand(0).getOperand(0);
744 SH
= (SHOpc
== ISD::SHL
) ? Value
: 32 - Value
;
749 SDValue Ops
[] = { Op0
, Op1
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
751 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
758 // Predict the number of instructions that would be generated by calling
760 static unsigned selectI64ImmInstrCountDirect(int64_t Imm
) {
761 // Assume no remaining bits.
762 unsigned Remainder
= 0;
763 // Assume no shift required.
766 // If it can't be represented as a 32 bit value.
767 if (!isInt
<32>(Imm
)) {
768 Shift
= countTrailingZeros
<uint64_t>(Imm
);
769 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
771 // If the shifted value fits 32 bits.
772 if (isInt
<32>(ImmSh
)) {
773 // Go with the shifted value.
776 // Still stuck with a 64 bit value.
783 // Intermediate operand.
786 // Handle first 32 bits.
787 unsigned Lo
= Imm
& 0xFFFF;
790 if (isInt
<16>(Imm
)) {
794 // Handle the Hi bits and Lo bits.
801 // If no shift, we're done.
802 if (!Shift
) return Result
;
804 // If Hi word == Lo word,
805 // we can use rldimi to insert the Lo word into Hi word.
806 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
811 // Shift for next step if the upper 32-bits were not zero.
815 // Add in the last bits as required.
816 if ((Remainder
>> 16) & 0xFFFF)
818 if (Remainder
& 0xFFFF)
824 static uint64_t Rot64(uint64_t Imm
, unsigned R
) {
825 return (Imm
<< R
) | (Imm
>> (64 - R
));
828 static unsigned selectI64ImmInstrCount(int64_t Imm
) {
829 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
831 // If the instruction count is 1 or 2, we do not need further analysis
832 // since rotate + load constant requires at least 2 instructions.
836 for (unsigned r
= 1; r
< 63; ++r
) {
837 uint64_t RImm
= Rot64(Imm
, r
);
838 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
839 Count
= std::min(Count
, RCount
);
841 // See comments in selectI64Imm for an explanation of the logic below.
842 unsigned LS
= findLastSet(RImm
);
846 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
847 uint64_t RImmWithOnes
= RImm
| OnesMask
;
849 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
850 Count
= std::min(Count
, RCount
);
856 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
857 // (above) needs to be kept in sync with this function.
858 static SDNode
*selectI64ImmDirect(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
860 // Assume no remaining bits.
861 unsigned Remainder
= 0;
862 // Assume no shift required.
865 // If it can't be represented as a 32 bit value.
866 if (!isInt
<32>(Imm
)) {
867 Shift
= countTrailingZeros
<uint64_t>(Imm
);
868 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
870 // If the shifted value fits 32 bits.
871 if (isInt
<32>(ImmSh
)) {
872 // Go with the shifted value.
875 // Still stuck with a 64 bit value.
882 // Intermediate operand.
885 // Handle first 32 bits.
886 unsigned Lo
= Imm
& 0xFFFF;
887 unsigned Hi
= (Imm
>> 16) & 0xFFFF;
889 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
890 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
894 if (isInt
<16>(Imm
)) {
895 uint64_t SextImm
= SignExtend64(Lo
, 16);
896 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
898 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
900 // Handle the Hi bits.
901 unsigned OpC
= Hi
? PPC::LIS8
: PPC::LI8
;
902 Result
= CurDAG
->getMachineNode(OpC
, dl
, MVT::i64
, getI32Imm(Hi
));
904 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
905 SDValue(Result
, 0), getI32Imm(Lo
));
908 Result
= CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
, getI32Imm(Hi
));
911 // If no shift, we're done.
912 if (!Shift
) return Result
;
914 // If Hi word == Lo word,
915 // we can use rldimi to insert the Lo word into Hi word.
916 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
918 { SDValue(Result
, 0), SDValue(Result
, 0), getI32Imm(Shift
), getI32Imm(0)};
919 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
922 // Shift for next step if the upper 32-bits were not zero.
924 Result
= CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
,
927 getI32Imm(63 - Shift
));
930 // Add in the last bits as required.
931 if ((Hi
= (Remainder
>> 16) & 0xFFFF)) {
932 Result
= CurDAG
->getMachineNode(PPC::ORIS8
, dl
, MVT::i64
,
933 SDValue(Result
, 0), getI32Imm(Hi
));
935 if ((Lo
= Remainder
& 0xFFFF)) {
936 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
937 SDValue(Result
, 0), getI32Imm(Lo
));
943 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
945 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
947 // If the instruction count is 1 or 2, we do not need further analysis
948 // since rotate + load constant requires at least 2 instructions.
950 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
957 for (unsigned r
= 1; r
< 63; ++r
) {
958 uint64_t RImm
= Rot64(Imm
, r
);
959 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
960 if (RCount
< Count
) {
967 // If the immediate to generate has many trailing zeros, it might be
968 // worthwhile to generate a rotated value with too many leading ones
969 // (because that's free with li/lis's sign-extension semantics), and then
970 // mask them off after rotation.
972 unsigned LS
= findLastSet(RImm
);
973 // We're adding (63-LS) higher-order ones, and we expect to mask them off
974 // after performing the inverse rotation by (64-r). So we need that:
975 // 63-LS == 64-r => LS == r-1
979 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
980 uint64_t RImmWithOnes
= RImm
| OnesMask
;
982 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
983 if (RCount
< Count
) {
986 MatImm
= RImmWithOnes
;
992 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
994 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
995 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
998 SDValue Val
= SDValue(selectI64ImmDirect(CurDAG
, dl
, MatImm
), 0);
999 return CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Val
,
1000 getI32Imm(64 - RMin
), getI32Imm(MaskEnd
));
1003 static unsigned allUsesTruncate(SelectionDAG
*CurDAG
, SDNode
*N
) {
1004 unsigned MaxTruncation
= 0;
1005 // Cannot use range-based for loop here as we need the actual use (i.e. we
1006 // need the operand number corresponding to the use). A range-based for
1007 // will unbox the use and provide an SDNode*.
1008 for (SDNode::use_iterator Use
= N
->use_begin(), UseEnd
= N
->use_end();
1009 Use
!= UseEnd
; ++Use
) {
1011 Use
->isMachineOpcode() ? Use
->getMachineOpcode() : Use
->getOpcode();
1015 if (Use
->isMachineOpcode())
1018 std::max(MaxTruncation
, Use
->getValueType(0).getSizeInBits());
1021 if (Use
->isMachineOpcode())
1023 StoreSDNode
*STN
= cast
<StoreSDNode
>(*Use
);
1024 unsigned MemVTSize
= STN
->getMemoryVT().getSizeInBits();
1025 if (MemVTSize
== 64 || Use
.getOperandNo() != 0)
1027 MaxTruncation
= std::max(MaxTruncation
, MemVTSize
);
1034 if (Use
.getOperandNo() != 0)
1036 MaxTruncation
= std::max(MaxTruncation
, 32u);
1042 if (Use
.getOperandNo() != 0)
1044 MaxTruncation
= std::max(MaxTruncation
, 16u);
1050 if (Use
.getOperandNo() != 0)
1052 MaxTruncation
= std::max(MaxTruncation
, 8u);
1056 return MaxTruncation
;
1059 // Select a 64-bit constant.
1060 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, SDNode
*N
) {
1063 // Get 64 bit value.
1064 int64_t Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
1065 if (unsigned MinSize
= allUsesTruncate(CurDAG
, N
)) {
1066 uint64_t SextImm
= SignExtend64(Imm
, MinSize
);
1067 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
1068 if (isInt
<16>(SextImm
))
1069 return CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
1071 return selectI64Imm(CurDAG
, dl
, Imm
);
1076 class BitPermutationSelector
{
1080 // The bit number in the value, using a convention where bit 0 is the
1081 // lowest-order bit.
1084 // ConstZero means a bit we need to mask off.
1085 // Variable is a bit comes from an input variable.
1086 // VariableKnownToBeZero is also a bit comes from an input variable,
1087 // but it is known to be already zero. So we do not need to mask them.
1091 VariableKnownToBeZero
1094 ValueBit(SDValue V
, unsigned I
, Kind K
= Variable
)
1095 : V(V
), Idx(I
), K(K
) {}
1096 ValueBit(Kind K
= Variable
)
1097 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX
), K(K
) {}
1099 bool isZero() const {
1100 return K
== ConstZero
|| K
== VariableKnownToBeZero
;
1103 bool hasValue() const {
1104 return K
== Variable
|| K
== VariableKnownToBeZero
;
1107 SDValue
getValue() const {
1108 assert(hasValue() && "Cannot get the value of a constant bit");
1112 unsigned getValueBitIndex() const {
1113 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1118 // A bit group has the same underlying value and the same rotate factor.
1122 unsigned StartIdx
, EndIdx
;
1124 // This rotation amount assumes that the lower 32 bits of the quantity are
1125 // replicated in the high 32 bits by the rotation operator (which is done
1126 // by rlwinm and friends in 64-bit mode).
1128 // Did converting to Repl32 == true change the rotation factor? If it did,
1129 // it decreased it by 32.
1131 // Was this group coalesced after setting Repl32 to true?
1132 bool Repl32Coalesced
;
1134 BitGroup(SDValue V
, unsigned R
, unsigned S
, unsigned E
)
1135 : V(V
), RLAmt(R
), StartIdx(S
), EndIdx(E
), Repl32(false), Repl32CR(false),
1136 Repl32Coalesced(false) {
1137 LLVM_DEBUG(dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R
1138 << " [" << S
<< ", " << E
<< "]\n");
1142 // Information on each (Value, RLAmt) pair (like the number of groups
1143 // associated with each) used to choose the lowering method.
1144 struct ValueRotInfo
{
1146 unsigned RLAmt
= std::numeric_limits
<unsigned>::max();
1147 unsigned NumGroups
= 0;
1148 unsigned FirstGroupStartIdx
= std::numeric_limits
<unsigned>::max();
1149 bool Repl32
= false;
1151 ValueRotInfo() = default;
1153 // For sorting (in reverse order) by NumGroups, and then by
1154 // FirstGroupStartIdx.
1155 bool operator < (const ValueRotInfo
&Other
) const {
1156 // We need to sort so that the non-Repl32 come first because, when we're
1157 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1158 // masking operation.
1159 if (Repl32
< Other
.Repl32
)
1161 else if (Repl32
> Other
.Repl32
)
1163 else if (NumGroups
> Other
.NumGroups
)
1165 else if (NumGroups
< Other
.NumGroups
)
1167 else if (RLAmt
== 0 && Other
.RLAmt
!= 0)
1169 else if (RLAmt
!= 0 && Other
.RLAmt
== 0)
1171 else if (FirstGroupStartIdx
< Other
.FirstGroupStartIdx
)
1177 using ValueBitsMemoizedValue
= std::pair
<bool, SmallVector
<ValueBit
, 64>>;
1178 using ValueBitsMemoizer
=
1179 DenseMap
<SDValue
, std::unique_ptr
<ValueBitsMemoizedValue
>>;
1180 ValueBitsMemoizer Memoizer
;
1182 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1183 // The bool is true if something interesting was deduced, otherwise if we're
1184 // providing only a generic representation of V (or something else likewise
1185 // uninteresting for instruction selection) through the SmallVector.
1186 std::pair
<bool, SmallVector
<ValueBit
, 64> *> getValueBits(SDValue V
,
1188 auto &ValueEntry
= Memoizer
[V
];
1190 return std::make_pair(ValueEntry
->first
, &ValueEntry
->second
);
1191 ValueEntry
.reset(new ValueBitsMemoizedValue());
1192 bool &Interesting
= ValueEntry
->first
;
1193 SmallVector
<ValueBit
, 64> &Bits
= ValueEntry
->second
;
1194 Bits
.resize(NumBits
);
1196 switch (V
.getOpcode()) {
1199 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1200 unsigned RotAmt
= V
.getConstantOperandVal(1);
1202 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1204 for (unsigned i
= 0; i
< NumBits
; ++i
)
1205 Bits
[i
] = LHSBits
[i
< RotAmt
? i
+ (NumBits
- RotAmt
) : i
- RotAmt
];
1207 return std::make_pair(Interesting
= true, &Bits
);
1211 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1212 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1214 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1216 for (unsigned i
= ShiftAmt
; i
< NumBits
; ++i
)
1217 Bits
[i
] = LHSBits
[i
- ShiftAmt
];
1219 for (unsigned i
= 0; i
< ShiftAmt
; ++i
)
1220 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1222 return std::make_pair(Interesting
= true, &Bits
);
1226 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1227 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1229 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1231 for (unsigned i
= 0; i
< NumBits
- ShiftAmt
; ++i
)
1232 Bits
[i
] = LHSBits
[i
+ ShiftAmt
];
1234 for (unsigned i
= NumBits
- ShiftAmt
; i
< NumBits
; ++i
)
1235 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1237 return std::make_pair(Interesting
= true, &Bits
);
1241 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1242 uint64_t Mask
= V
.getConstantOperandVal(1);
1244 const SmallVector
<ValueBit
, 64> *LHSBits
;
1245 // Mark this as interesting, only if the LHS was also interesting. This
1246 // prevents the overall procedure from matching a single immediate 'and'
1247 // (which is non-optimal because such an and might be folded with other
1248 // things if we don't select it here).
1249 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0), NumBits
);
1251 for (unsigned i
= 0; i
< NumBits
; ++i
)
1252 if (((Mask
>> i
) & 1) == 1)
1253 Bits
[i
] = (*LHSBits
)[i
];
1255 // AND instruction masks this bit. If the input is already zero,
1256 // we have nothing to do here. Otherwise, make the bit ConstZero.
1257 if ((*LHSBits
)[i
].isZero())
1258 Bits
[i
] = (*LHSBits
)[i
];
1260 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1263 return std::make_pair(Interesting
, &Bits
);
1267 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1268 const auto &RHSBits
= *getValueBits(V
.getOperand(1), NumBits
).second
;
1270 bool AllDisjoint
= true;
1271 SDValue LastVal
= SDValue();
1272 unsigned LastIdx
= 0;
1273 for (unsigned i
= 0; i
< NumBits
; ++i
) {
1274 if (LHSBits
[i
].isZero() && RHSBits
[i
].isZero()) {
1275 // If both inputs are known to be zero and one is ConstZero and
1276 // another is VariableKnownToBeZero, we can select whichever
1277 // we like. To minimize the number of bit groups, we select
1278 // VariableKnownToBeZero if this bit is the next bit of the same
1279 // input variable from the previous bit. Otherwise, we select
1281 if (LHSBits
[i
].hasValue() && LHSBits
[i
].getValue() == LastVal
&&
1282 LHSBits
[i
].getValueBitIndex() == LastIdx
+ 1)
1283 Bits
[i
] = LHSBits
[i
];
1284 else if (RHSBits
[i
].hasValue() && RHSBits
[i
].getValue() == LastVal
&&
1285 RHSBits
[i
].getValueBitIndex() == LastIdx
+ 1)
1286 Bits
[i
] = RHSBits
[i
];
1288 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1290 else if (LHSBits
[i
].isZero())
1291 Bits
[i
] = RHSBits
[i
];
1292 else if (RHSBits
[i
].isZero())
1293 Bits
[i
] = LHSBits
[i
];
1295 AllDisjoint
= false;
1298 // We remember the value and bit index of this bit.
1299 if (Bits
[i
].hasValue()) {
1300 LastVal
= Bits
[i
].getValue();
1301 LastIdx
= Bits
[i
].getValueBitIndex();
1304 if (LastVal
) LastVal
= SDValue();
1312 return std::make_pair(Interesting
= true, &Bits
);
1314 case ISD::ZERO_EXTEND
: {
1315 // We support only the case with zero extension from i32 to i64 so far.
1316 if (V
.getValueType() != MVT::i64
||
1317 V
.getOperand(0).getValueType() != MVT::i32
)
1320 const SmallVector
<ValueBit
, 64> *LHSBits
;
1321 const unsigned NumOperandBits
= 32;
1322 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1325 for (unsigned i
= 0; i
< NumOperandBits
; ++i
)
1326 Bits
[i
] = (*LHSBits
)[i
];
1328 for (unsigned i
= NumOperandBits
; i
< NumBits
; ++i
)
1329 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1331 return std::make_pair(Interesting
, &Bits
);
1333 case ISD::TRUNCATE
: {
1334 EVT FromType
= V
.getOperand(0).getValueType();
1335 EVT ToType
= V
.getValueType();
1336 // We support only the case with truncate from i64 to i32.
1337 if (FromType
!= MVT::i64
|| ToType
!= MVT::i32
)
1339 const unsigned NumAllBits
= FromType
.getSizeInBits();
1340 SmallVector
<ValueBit
, 64> *InBits
;
1341 std::tie(Interesting
, InBits
) = getValueBits(V
.getOperand(0),
1343 const unsigned NumValidBits
= ToType
.getSizeInBits();
1345 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1346 // So, we cannot include this truncate.
1347 bool UseUpper32bit
= false;
1348 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1349 if ((*InBits
)[i
].hasValue() && (*InBits
)[i
].getValueBitIndex() >= 32) {
1350 UseUpper32bit
= true;
1356 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1357 Bits
[i
] = (*InBits
)[i
];
1359 return std::make_pair(Interesting
, &Bits
);
1361 case ISD::AssertZext
: {
1362 // For AssertZext, we look through the operand and
1363 // mark the bits known to be zero.
1364 const SmallVector
<ValueBit
, 64> *LHSBits
;
1365 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1368 EVT FromType
= cast
<VTSDNode
>(V
.getOperand(1))->getVT();
1369 const unsigned NumValidBits
= FromType
.getSizeInBits();
1370 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1371 Bits
[i
] = (*LHSBits
)[i
];
1373 // These bits are known to be zero.
1374 for (unsigned i
= NumValidBits
; i
< NumBits
; ++i
)
1375 Bits
[i
] = ValueBit((*LHSBits
)[i
].getValue(),
1376 (*LHSBits
)[i
].getValueBitIndex(),
1377 ValueBit::VariableKnownToBeZero
);
1379 return std::make_pair(Interesting
, &Bits
);
1382 LoadSDNode
*LD
= cast
<LoadSDNode
>(V
);
1383 if (ISD::isZEXTLoad(V
.getNode()) && V
.getResNo() == 0) {
1384 EVT VT
= LD
->getMemoryVT();
1385 const unsigned NumValidBits
= VT
.getSizeInBits();
1387 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1388 Bits
[i
] = ValueBit(V
, i
);
1390 // These bits are known to be zero.
1391 for (unsigned i
= NumValidBits
; i
< NumBits
; ++i
)
1392 Bits
[i
] = ValueBit(V
, i
, ValueBit::VariableKnownToBeZero
);
1394 // Zero-extending load itself cannot be optimized. So, it is not
1395 // interesting by itself though it gives useful information.
1396 return std::make_pair(Interesting
= false, &Bits
);
1401 for (unsigned i
= 0; i
< NumBits
; ++i
)
1402 Bits
[i
] = ValueBit(V
, i
);
1404 return std::make_pair(Interesting
= false, &Bits
);
1407 // For each value (except the constant ones), compute the left-rotate amount
1408 // to get it from its original to final position.
1409 void computeRotationAmounts() {
1411 RLAmt
.resize(Bits
.size());
1412 for (unsigned i
= 0; i
< Bits
.size(); ++i
)
1413 if (Bits
[i
].hasValue()) {
1414 unsigned VBI
= Bits
[i
].getValueBitIndex();
1418 RLAmt
[i
] = Bits
.size() - (VBI
- i
);
1419 } else if (Bits
[i
].isZero()) {
1421 RLAmt
[i
] = UINT32_MAX
;
1423 llvm_unreachable("Unknown value bit type");
1427 // Collect groups of consecutive bits with the same underlying value and
1428 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1429 // they break up groups.
1430 void collectBitGroups(bool LateMask
) {
1433 unsigned LastRLAmt
= RLAmt
[0];
1434 SDValue LastValue
= Bits
[0].hasValue() ? Bits
[0].getValue() : SDValue();
1435 unsigned LastGroupStartIdx
= 0;
1436 bool IsGroupOfZeros
= !Bits
[LastGroupStartIdx
].hasValue();
1437 for (unsigned i
= 1; i
< Bits
.size(); ++i
) {
1438 unsigned ThisRLAmt
= RLAmt
[i
];
1439 SDValue ThisValue
= Bits
[i
].hasValue() ? Bits
[i
].getValue() : SDValue();
1440 if (LateMask
&& !ThisValue
) {
1441 ThisValue
= LastValue
;
1442 ThisRLAmt
= LastRLAmt
;
1443 // If we're doing late masking, then the first bit group always starts
1444 // at zero (even if the first bits were zero).
1445 if (BitGroups
.empty())
1446 LastGroupStartIdx
= 0;
1449 // If this bit is known to be zero and the current group is a bit group
1450 // of zeros, we do not need to terminate the current bit group even the
1451 // Value or RLAmt does not match here. Instead, we terminate this group
1452 // when the first non-zero bit appears later.
1453 if (IsGroupOfZeros
&& Bits
[i
].isZero())
1456 // If this bit has the same underlying value and the same rotate factor as
1457 // the last one, then they're part of the same group.
1458 if (ThisRLAmt
== LastRLAmt
&& ThisValue
== LastValue
)
1459 // We cannot continue the current group if this bits is not known to
1460 // be zero in a bit group of zeros.
1461 if (!(IsGroupOfZeros
&& ThisValue
&& !Bits
[i
].isZero()))
1464 if (LastValue
.getNode())
1465 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1467 LastRLAmt
= ThisRLAmt
;
1468 LastValue
= ThisValue
;
1469 LastGroupStartIdx
= i
;
1470 IsGroupOfZeros
= !Bits
[LastGroupStartIdx
].hasValue();
1472 if (LastValue
.getNode())
1473 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1476 if (BitGroups
.empty())
1479 // We might be able to combine the first and last groups.
1480 if (BitGroups
.size() > 1) {
1481 // If the first and last groups are the same, then remove the first group
1482 // in favor of the last group, making the ending index of the last group
1483 // equal to the ending index of the to-be-removed first group.
1484 if (BitGroups
[0].StartIdx
== 0 &&
1485 BitGroups
[BitGroups
.size()-1].EndIdx
== Bits
.size()-1 &&
1486 BitGroups
[0].V
== BitGroups
[BitGroups
.size()-1].V
&&
1487 BitGroups
[0].RLAmt
== BitGroups
[BitGroups
.size()-1].RLAmt
) {
1488 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1489 BitGroups
[BitGroups
.size()-1].EndIdx
= BitGroups
[0].EndIdx
;
1490 BitGroups
.erase(BitGroups
.begin());
1495 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1496 // associated with each. If the number of groups are same, we prefer a group
1497 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1498 // instruction. If there is a degeneracy, pick the one that occurs
1499 // first (in the final value).
1500 void collectValueRotInfo() {
1503 for (auto &BG
: BitGroups
) {
1504 unsigned RLAmtKey
= BG
.RLAmt
+ (BG
.Repl32
? 64 : 0);
1505 ValueRotInfo
&VRI
= ValueRots
[std::make_pair(BG
.V
, RLAmtKey
)];
1507 VRI
.RLAmt
= BG
.RLAmt
;
1508 VRI
.Repl32
= BG
.Repl32
;
1510 VRI
.FirstGroupStartIdx
= std::min(VRI
.FirstGroupStartIdx
, BG
.StartIdx
);
1513 // Now that we've collected the various ValueRotInfo instances, we need to
1515 ValueRotsVec
.clear();
1516 for (auto &I
: ValueRots
) {
1517 ValueRotsVec
.push_back(I
.second
);
1519 llvm::sort(ValueRotsVec
);
1522 // In 64-bit mode, rlwinm and friends have a rotation operator that
1523 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1524 // indices of these instructions can only be in the lower 32 bits, so they
1525 // can only represent some 64-bit bit groups. However, when they can be used,
1526 // the 32-bit replication can be used to represent, as a single bit group,
1527 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1528 // groups when possible. Returns true if any of the bit groups were
1530 void assignRepl32BitGroups() {
1531 // If we have bits like this:
1533 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1534 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1535 // Groups: | RLAmt = 8 | RLAmt = 40 |
1537 // But, making use of a 32-bit operation that replicates the low-order 32
1538 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1541 auto IsAllLow32
= [this](BitGroup
& BG
) {
1542 if (BG
.StartIdx
<= BG
.EndIdx
) {
1543 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
) {
1544 if (!Bits
[i
].hasValue())
1546 if (Bits
[i
].getValueBitIndex() >= 32)
1550 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
) {
1551 if (!Bits
[i
].hasValue())
1553 if (Bits
[i
].getValueBitIndex() >= 32)
1556 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
) {
1557 if (!Bits
[i
].hasValue())
1559 if (Bits
[i
].getValueBitIndex() >= 32)
1567 for (auto &BG
: BitGroups
) {
1568 // If this bit group has RLAmt of 0 and will not be merged with
1569 // another bit group, we don't benefit from Repl32. We don't mark
1570 // such group to give more freedom for later instruction selection.
1571 if (BG
.RLAmt
== 0) {
1572 auto PotentiallyMerged
= [this](BitGroup
& BG
) {
1573 for (auto &BG2
: BitGroups
)
1574 if (&BG
!= &BG2
&& BG
.V
== BG2
.V
&&
1575 (BG2
.RLAmt
== 0 || BG2
.RLAmt
== 32))
1579 if (!PotentiallyMerged(BG
))
1582 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32) {
1583 if (IsAllLow32(BG
)) {
1584 if (BG
.RLAmt
>= 32) {
1591 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1592 << BG
.V
.getNode() << " RLAmt = " << BG
.RLAmt
<< " ["
1593 << BG
.StartIdx
<< ", " << BG
.EndIdx
<< "]\n");
1598 // Now walk through the bit groups, consolidating where possible.
1599 for (auto I
= BitGroups
.begin(); I
!= BitGroups
.end();) {
1600 // We might want to remove this bit group by merging it with the previous
1601 // group (which might be the ending group).
1602 auto IP
= (I
== BitGroups
.begin()) ?
1603 std::prev(BitGroups
.end()) : std::prev(I
);
1604 if (I
->Repl32
&& IP
->Repl32
&& I
->V
== IP
->V
&& I
->RLAmt
== IP
->RLAmt
&&
1605 I
->StartIdx
== (IP
->EndIdx
+ 1) % 64 && I
!= IP
) {
1607 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1608 << I
->V
.getNode() << " RLAmt = " << I
->RLAmt
<< " ["
1609 << I
->StartIdx
<< ", " << I
->EndIdx
1610 << "] with group with range [" << IP
->StartIdx
<< ", "
1611 << IP
->EndIdx
<< "]\n");
1613 IP
->EndIdx
= I
->EndIdx
;
1614 IP
->Repl32CR
= IP
->Repl32CR
|| I
->Repl32CR
;
1615 IP
->Repl32Coalesced
= true;
1616 I
= BitGroups
.erase(I
);
1619 // There is a special case worth handling: If there is a single group
1620 // covering the entire upper 32 bits, and it can be merged with both
1621 // the next and previous groups (which might be the same group), then
1622 // do so. If it is the same group (so there will be only one group in
1623 // total), then we need to reverse the order of the range so that it
1624 // covers the entire 64 bits.
1625 if (I
->StartIdx
== 32 && I
->EndIdx
== 63) {
1626 assert(std::next(I
) == BitGroups
.end() &&
1627 "bit group ends at index 63 but there is another?");
1628 auto IN
= BitGroups
.begin();
1630 if (IP
->Repl32
&& IN
->Repl32
&& I
->V
== IP
->V
&& I
->V
== IN
->V
&&
1631 (I
->RLAmt
% 32) == IP
->RLAmt
&& (I
->RLAmt
% 32) == IN
->RLAmt
&&
1632 IP
->EndIdx
== 31 && IN
->StartIdx
== 0 && I
!= IP
&&
1635 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I
->V
.getNode()
1636 << " RLAmt = " << I
->RLAmt
<< " [" << I
->StartIdx
1637 << ", " << I
->EndIdx
1638 << "] with 32-bit replicated groups with ranges ["
1639 << IP
->StartIdx
<< ", " << IP
->EndIdx
<< "] and ["
1640 << IN
->StartIdx
<< ", " << IN
->EndIdx
<< "]\n");
1643 // There is only one other group; change it to cover the whole
1644 // range (backward, so that it can still be Repl32 but cover the
1645 // whole 64-bit range).
1648 IP
->Repl32CR
= IP
->Repl32CR
|| I
->RLAmt
>= 32;
1649 IP
->Repl32Coalesced
= true;
1650 I
= BitGroups
.erase(I
);
1652 // There are two separate groups, one before this group and one
1653 // after us (at the beginning). We're going to remove this group,
1654 // but also the group at the very beginning.
1655 IP
->EndIdx
= IN
->EndIdx
;
1656 IP
->Repl32CR
= IP
->Repl32CR
|| IN
->Repl32CR
|| I
->RLAmt
>= 32;
1657 IP
->Repl32Coalesced
= true;
1658 I
= BitGroups
.erase(I
);
1659 BitGroups
.erase(BitGroups
.begin());
1662 // This must be the last group in the vector (and we might have
1663 // just invalidated the iterator above), so break here.
1673 SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
1674 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1677 uint64_t getZerosMask() {
1679 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1680 if (Bits
[i
].hasValue())
1682 Mask
|= (UINT64_C(1) << i
);
1688 // This method extends an input value to 64 bit if input is 32-bit integer.
1689 // While selecting instructions in BitPermutationSelector in 64-bit mode,
1690 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1691 // In such case, we extend it to 64 bit to be consistent with other values.
1692 SDValue
ExtendToInt64(SDValue V
, const SDLoc
&dl
) {
1693 if (V
.getValueSizeInBits() == 64)
1696 assert(V
.getValueSizeInBits() == 32);
1697 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1698 SDValue ImDef
= SDValue(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
1700 SDValue ExtVal
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
1706 SDValue
TruncateToInt32(SDValue V
, const SDLoc
&dl
) {
1707 if (V
.getValueSizeInBits() == 32)
1710 assert(V
.getValueSizeInBits() == 64);
1711 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1712 SDValue SubVal
= SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
,
1713 MVT::i32
, V
, SubRegIdx
), 0);
1717 // Depending on the number of groups for a particular value, it might be
1718 // better to rotate, mask explicitly (using andi/andis), and then or the
1719 // result. Select this part of the result first.
1720 void SelectAndParts32(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1721 if (BPermRewriterNoMasking
)
1724 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1726 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1727 if (!Bits
[i
].hasValue() || Bits
[i
].getValue() != VRI
.V
)
1729 if (RLAmt
[i
] != VRI
.RLAmt
)
1734 // Compute the masks for andi/andis that would be necessary.
1735 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1736 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1737 "No set bits in mask for value bit groups");
1738 bool NeedsRotate
= VRI
.RLAmt
!= 0;
1740 // We're trying to minimize the number of instructions. If we have one
1741 // group, using one of andi/andis can break even. If we have three
1742 // groups, we can use both andi and andis and break even (to use both
1743 // andi and andis we also need to or the results together). We need four
1744 // groups if we also need to rotate. To use andi/andis we need to do more
1745 // than break even because rotate-and-mask instructions tend to be easier
1748 // FIXME: We've biased here against using andi/andis, which is right for
1749 // POWER cores, but not optimal everywhere. For example, on the A2,
1750 // andi/andis have single-cycle latency whereas the rotate-and-mask
1751 // instructions take two cycles, and it would be better to bias toward
1752 // andi/andis in break-even cases.
1754 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
1755 (unsigned) (ANDIMask
!= 0) +
1756 (unsigned) (ANDISMask
!= 0) +
1757 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0) +
1758 (unsigned) (bool) Res
;
1760 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
1761 << " RL: " << VRI
.RLAmt
<< ":"
1762 << "\n\t\t\tisel using masking: " << NumAndInsts
1763 << " using rotates: " << VRI
.NumGroups
<< "\n");
1765 if (NumAndInsts
>= VRI
.NumGroups
)
1768 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1770 if (InstCnt
) *InstCnt
+= NumAndInsts
;
1775 { TruncateToInt32(VRI
.V
, dl
), getI32Imm(VRI
.RLAmt
, dl
),
1776 getI32Imm(0, dl
), getI32Imm(31, dl
) };
1777 VRot
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
1780 VRot
= TruncateToInt32(VRI
.V
, dl
);
1783 SDValue ANDIVal
, ANDISVal
;
1785 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1786 VRot
, getI32Imm(ANDIMask
, dl
)), 0);
1788 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1789 VRot
, getI32Imm(ANDISMask
, dl
)), 0);
1793 TotalVal
= ANDISVal
;
1797 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1798 ANDIVal
, ANDISVal
), 0);
1803 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1806 // Now, remove all groups with this underlying value and rotation
1808 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1809 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1814 // Instruction selection for the 32-bit case.
1815 SDNode
*Select32(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
1819 if (InstCnt
) *InstCnt
= 0;
1821 // Take care of cases that should use andi/andis first.
1822 SelectAndParts32(dl
, Res
, InstCnt
);
1824 // If we've not yet selected a 'starting' instruction, and we have no zeros
1825 // to fill in, select the (Value, RLAmt) with the highest priority (largest
1826 // number of groups), and start with this rotated value.
1827 if ((!NeedMask
|| LateMask
) && !Res
) {
1828 ValueRotInfo
&VRI
= ValueRotsVec
[0];
1830 if (InstCnt
) *InstCnt
+= 1;
1832 { TruncateToInt32(VRI
.V
, dl
), getI32Imm(VRI
.RLAmt
, dl
),
1833 getI32Imm(0, dl
), getI32Imm(31, dl
) };
1834 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
),
1837 Res
= TruncateToInt32(VRI
.V
, dl
);
1840 // Now, remove all groups with this underlying value and rotation factor.
1841 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1842 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1846 if (InstCnt
) *InstCnt
+= BitGroups
.size();
1848 // Insert the other groups (one at a time).
1849 for (auto &BG
: BitGroups
) {
1852 { TruncateToInt32(BG
.V
, dl
), getI32Imm(BG
.RLAmt
, dl
),
1853 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1854 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1855 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
1858 { Res
, TruncateToInt32(BG
.V
, dl
), getI32Imm(BG
.RLAmt
, dl
),
1859 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1860 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1861 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
), 0);
1866 unsigned Mask
= (unsigned) getZerosMask();
1868 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1869 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1870 "No set bits in zeros mask?");
1872 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
1873 (unsigned) (ANDISMask
!= 0) +
1874 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1876 SDValue ANDIVal
, ANDISVal
;
1878 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1879 Res
, getI32Imm(ANDIMask
, dl
)), 0);
1881 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1882 Res
, getI32Imm(ANDISMask
, dl
)), 0);
1889 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1890 ANDIVal
, ANDISVal
), 0);
1893 return Res
.getNode();
1896 unsigned SelectRotMask64Count(unsigned RLAmt
, bool Repl32
,
1897 unsigned MaskStart
, unsigned MaskEnd
,
1899 // In the notation used by the instructions, 'start' and 'end' are reversed
1900 // because bits are counted from high to low order.
1901 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1902 InstMaskEnd
= 64 - MaskStart
- 1;
1907 if ((!IsIns
&& (InstMaskEnd
== 63 || InstMaskStart
== 0)) ||
1908 InstMaskEnd
== 63 - RLAmt
)
1914 // For 64-bit values, not all combinations of rotates and masks are
1915 // available. Produce one if it is available.
1916 SDValue
SelectRotMask64(SDValue V
, const SDLoc
&dl
, unsigned RLAmt
,
1917 bool Repl32
, unsigned MaskStart
, unsigned MaskEnd
,
1918 unsigned *InstCnt
= nullptr) {
1919 // In the notation used by the instructions, 'start' and 'end' are reversed
1920 // because bits are counted from high to low order.
1921 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1922 InstMaskEnd
= 64 - MaskStart
- 1;
1924 if (InstCnt
) *InstCnt
+= 1;
1927 // This rotation amount assumes that the lower 32 bits of the quantity
1928 // are replicated in the high 32 bits by the rotation operator (which is
1929 // done by rlwinm and friends).
1930 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1931 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1933 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1934 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
1935 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM8
, dl
, MVT::i64
,
1939 if (InstMaskEnd
== 63) {
1941 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1942 getI32Imm(InstMaskStart
, dl
) };
1943 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Ops
), 0);
1946 if (InstMaskStart
== 0) {
1948 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1949 getI32Imm(InstMaskEnd
, dl
) };
1950 return SDValue(CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Ops
), 0);
1953 if (InstMaskEnd
== 63 - RLAmt
) {
1955 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1956 getI32Imm(InstMaskStart
, dl
) };
1957 return SDValue(CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, Ops
), 0);
1960 // We cannot do this with a single instruction, so we'll use two. The
1961 // problem is that we're not free to choose both a rotation amount and mask
1962 // start and end independently. We can choose an arbitrary mask start and
1963 // end, but then the rotation amount is fixed. Rotation, however, can be
1964 // inverted, and so by applying an "inverse" rotation first, we can get the
1966 if (InstCnt
) *InstCnt
+= 1;
1968 // The rotation mask for the second instruction must be MaskStart.
1969 unsigned RLAmt2
= MaskStart
;
1970 // The first instruction must rotate V so that the overall rotation amount
1972 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
1974 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
1975 return SelectRotMask64(V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
1978 // For 64-bit values, not all combinations of rotates and masks are
1979 // available. Produce a rotate-mask-and-insert if one is available.
1980 SDValue
SelectRotMaskIns64(SDValue Base
, SDValue V
, const SDLoc
&dl
,
1981 unsigned RLAmt
, bool Repl32
, unsigned MaskStart
,
1982 unsigned MaskEnd
, unsigned *InstCnt
= nullptr) {
1983 // In the notation used by the instructions, 'start' and 'end' are reversed
1984 // because bits are counted from high to low order.
1985 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1986 InstMaskEnd
= 64 - MaskStart
- 1;
1988 if (InstCnt
) *InstCnt
+= 1;
1991 // This rotation amount assumes that the lower 32 bits of the quantity
1992 // are replicated in the high 32 bits by the rotation operator (which is
1993 // done by rlwinm and friends).
1994 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1995 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1997 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1998 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
1999 return SDValue(CurDAG
->getMachineNode(PPC::RLWIMI8
, dl
, MVT::i64
,
2003 if (InstMaskEnd
== 63 - RLAmt
) {
2005 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2006 getI32Imm(InstMaskStart
, dl
) };
2007 return SDValue(CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
), 0);
2010 // We cannot do this with a single instruction, so we'll use two. The
2011 // problem is that we're not free to choose both a rotation amount and mask
2012 // start and end independently. We can choose an arbitrary mask start and
2013 // end, but then the rotation amount is fixed. Rotation, however, can be
2014 // inverted, and so by applying an "inverse" rotation first, we can get the
2016 if (InstCnt
) *InstCnt
+= 1;
2018 // The rotation mask for the second instruction must be MaskStart.
2019 unsigned RLAmt2
= MaskStart
;
2020 // The first instruction must rotate V so that the overall rotation amount
2022 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
2024 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
2025 return SelectRotMaskIns64(Base
, V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
2028 void SelectAndParts64(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
2029 if (BPermRewriterNoMasking
)
2032 // The idea here is the same as in the 32-bit version, but with additional
2033 // complications from the fact that Repl32 might be true. Because we
2034 // aggressively convert bit groups to Repl32 form (which, for small
2035 // rotation factors, involves no other change), and then coalesce, it might
2036 // be the case that a single 64-bit masking operation could handle both
2037 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2038 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2039 // completely capture the new combined bit group.
2041 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
2044 // We need to add to the mask all bits from the associated bit groups.
2045 // If Repl32 is false, we need to add bits from bit groups that have
2046 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2047 // group is trivially convertable if it overlaps only with the lower 32
2048 // bits, and the group has not been coalesced.
2049 auto MatchingBG
= [VRI
](const BitGroup
&BG
) {
2053 unsigned EffRLAmt
= BG
.RLAmt
;
2054 if (!VRI
.Repl32
&& BG
.Repl32
) {
2055 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
<= BG
.EndIdx
&&
2056 !BG
.Repl32Coalesced
) {
2062 } else if (VRI
.Repl32
!= BG
.Repl32
) {
2066 return VRI
.RLAmt
== EffRLAmt
;
2069 for (auto &BG
: BitGroups
) {
2070 if (!MatchingBG(BG
))
2073 if (BG
.StartIdx
<= BG
.EndIdx
) {
2074 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
)
2075 Mask
|= (UINT64_C(1) << i
);
2077 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
)
2078 Mask
|= (UINT64_C(1) << i
);
2079 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
)
2080 Mask
|= (UINT64_C(1) << i
);
2084 // We can use the 32-bit andi/andis technique if the mask does not
2085 // require any higher-order bits. This can save an instruction compared
2086 // to always using the general 64-bit technique.
2087 bool Use32BitInsts
= isUInt
<32>(Mask
);
2088 // Compute the masks for andi/andis that would be necessary.
2089 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2090 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2092 bool NeedsRotate
= VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
));
2094 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
2095 (unsigned) (bool) Res
;
2097 NumAndInsts
+= (unsigned) (ANDIMask
!= 0) + (unsigned) (ANDISMask
!= 0) +
2098 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2100 NumAndInsts
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
2102 unsigned NumRLInsts
= 0;
2103 bool FirstBG
= true;
2104 bool MoreBG
= false;
2105 for (auto &BG
: BitGroups
) {
2106 if (!MatchingBG(BG
)) {
2111 SelectRotMask64Count(BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
, BG
.EndIdx
,
2116 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
2117 << " RL: " << VRI
.RLAmt
<< (VRI
.Repl32
? " (32):" : ":")
2118 << "\n\t\t\tisel using masking: " << NumAndInsts
2119 << " using rotates: " << NumRLInsts
<< "\n");
2121 // When we'd use andi/andis, we bias toward using the rotates (andi only
2122 // has a record form, and is cracked on POWER cores). However, when using
2123 // general 64-bit constant formation, bias toward the constant form,
2124 // because that exposes more opportunities for CSE.
2125 if (NumAndInsts
> NumRLInsts
)
2127 // When merging multiple bit groups, instruction or is used.
2128 // But when rotate is used, rldimi can inert the rotated value into any
2129 // register, so instruction or can be avoided.
2130 if ((Use32BitInsts
|| MoreBG
) && NumAndInsts
== NumRLInsts
)
2133 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2135 if (InstCnt
) *InstCnt
+= NumAndInsts
;
2138 // We actually need to generate a rotation if we have a non-zero rotation
2139 // factor or, in the Repl32 case, if we care about any of the
2140 // higher-order replicated bits. In the latter case, we generate a mask
2141 // backward so that it actually includes the entire 64 bits.
2142 if (VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
)))
2143 VRot
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2144 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63);
2149 if (Use32BitInsts
) {
2150 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2151 "No set bits in mask when using 32-bit ands for 64-bit value");
2153 SDValue ANDIVal
, ANDISVal
;
2155 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
2156 ExtendToInt64(VRot
, dl
),
2157 getI32Imm(ANDIMask
, dl
)),
2160 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
2161 ExtendToInt64(VRot
, dl
),
2162 getI32Imm(ANDISMask
, dl
)),
2166 TotalVal
= ANDISVal
;
2170 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2171 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2173 TotalVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2175 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2176 ExtendToInt64(VRot
, dl
), TotalVal
),
2183 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2184 ExtendToInt64(Res
, dl
), TotalVal
),
2187 // Now, remove all groups with this underlying value and rotation
2189 eraseMatchingBitGroups(MatchingBG
);
2193 // Instruction selection for the 64-bit case.
2194 SDNode
*Select64(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
2198 if (InstCnt
) *InstCnt
= 0;
2200 // Take care of cases that should use andi/andis first.
2201 SelectAndParts64(dl
, Res
, InstCnt
);
2203 // If we've not yet selected a 'starting' instruction, and we have no zeros
2204 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2205 // number of groups), and start with this rotated value.
2206 if ((!NeedMask
|| LateMask
) && !Res
) {
2207 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2208 // groups will come first, and so the VRI representing the largest number
2209 // of groups might not be first (it might be the first Repl32 groups).
2210 unsigned MaxGroupsIdx
= 0;
2211 if (!ValueRotsVec
[0].Repl32
) {
2212 for (unsigned i
= 0, ie
= ValueRotsVec
.size(); i
< ie
; ++i
)
2213 if (ValueRotsVec
[i
].Repl32
) {
2214 if (ValueRotsVec
[i
].NumGroups
> ValueRotsVec
[0].NumGroups
)
2220 ValueRotInfo
&VRI
= ValueRotsVec
[MaxGroupsIdx
];
2221 bool NeedsRotate
= false;
2224 } else if (VRI
.Repl32
) {
2225 for (auto &BG
: BitGroups
) {
2226 if (BG
.V
!= VRI
.V
|| BG
.RLAmt
!= VRI
.RLAmt
||
2227 BG
.Repl32
!= VRI
.Repl32
)
2230 // We don't need a rotate if the bit group is confined to the lower
2232 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
< BG
.EndIdx
)
2241 Res
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2242 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63,
2247 // Now, remove all groups with this underlying value and rotation factor.
2249 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
2250 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
&&
2251 BG
.Repl32
== VRI
.Repl32
;
2255 // Because 64-bit rotates are more flexible than inserts, we might have a
2256 // preference regarding which one we do first (to save one instruction).
2258 for (auto I
= BitGroups
.begin(), IE
= BitGroups
.end(); I
!= IE
; ++I
) {
2259 if (SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2261 SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2263 if (I
!= BitGroups
.begin()) {
2266 BitGroups
.insert(BitGroups
.begin(), BG
);
2273 // Insert the other groups (one at a time).
2274 for (auto &BG
: BitGroups
) {
2276 Res
= SelectRotMask64(BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
,
2277 BG
.EndIdx
, InstCnt
);
2279 Res
= SelectRotMaskIns64(Res
, BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
,
2280 BG
.StartIdx
, BG
.EndIdx
, InstCnt
);
2284 uint64_t Mask
= getZerosMask();
2286 // We can use the 32-bit andi/andis technique if the mask does not
2287 // require any higher-order bits. This can save an instruction compared
2288 // to always using the general 64-bit technique.
2289 bool Use32BitInsts
= isUInt
<32>(Mask
);
2290 // Compute the masks for andi/andis that would be necessary.
2291 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2292 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2294 if (Use32BitInsts
) {
2295 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2296 "No set bits in mask when using 32-bit ands for 64-bit value");
2298 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
2299 (unsigned) (ANDISMask
!= 0) +
2300 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2302 SDValue ANDIVal
, ANDISVal
;
2304 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
2305 ExtendToInt64(Res
, dl
), getI32Imm(ANDIMask
, dl
)), 0);
2307 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
2308 ExtendToInt64(Res
, dl
), getI32Imm(ANDISMask
, dl
)), 0);
2315 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2316 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2318 if (InstCnt
) *InstCnt
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
2320 SDValue MaskVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2322 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2323 ExtendToInt64(Res
, dl
), MaskVal
), 0);
2327 return Res
.getNode();
2330 SDNode
*Select(SDNode
*N
, bool LateMask
, unsigned *InstCnt
= nullptr) {
2331 // Fill in BitGroups.
2332 collectBitGroups(LateMask
);
2333 if (BitGroups
.empty())
2336 // For 64-bit values, figure out when we can use 32-bit instructions.
2337 if (Bits
.size() == 64)
2338 assignRepl32BitGroups();
2340 // Fill in ValueRotsVec.
2341 collectValueRotInfo();
2343 if (Bits
.size() == 32) {
2344 return Select32(N
, LateMask
, InstCnt
);
2346 assert(Bits
.size() == 64 && "Not 64 bits here?");
2347 return Select64(N
, LateMask
, InstCnt
);
2353 void eraseMatchingBitGroups(function_ref
<bool(const BitGroup
&)> F
) {
2354 BitGroups
.erase(remove_if(BitGroups
, F
), BitGroups
.end());
2357 SmallVector
<ValueBit
, 64> Bits
;
2360 SmallVector
<unsigned, 64> RLAmt
;
2362 SmallVector
<BitGroup
, 16> BitGroups
;
2364 DenseMap
<std::pair
<SDValue
, unsigned>, ValueRotInfo
> ValueRots
;
2365 SmallVector
<ValueRotInfo
, 16> ValueRotsVec
;
2367 SelectionDAG
*CurDAG
;
2370 BitPermutationSelector(SelectionDAG
*DAG
)
2373 // Here we try to match complex bit permutations into a set of
2374 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2375 // known to produce optimial code for common cases (like i32 byte swapping).
2376 SDNode
*Select(SDNode
*N
) {
2379 getValueBits(SDValue(N
, 0), N
->getValueType(0).getSizeInBits());
2382 Bits
= std::move(*Result
.second
);
2384 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2385 " selection for: ");
2386 LLVM_DEBUG(N
->dump(CurDAG
));
2388 // Fill it RLAmt and set NeedMask.
2389 computeRotationAmounts();
2392 return Select(N
, false);
2394 // We currently have two techniques for handling results with zeros: early
2395 // masking (the default) and late masking. Late masking is sometimes more
2396 // efficient, but because the structure of the bit groups is different, it
2397 // is hard to tell without generating both and comparing the results. With
2398 // late masking, we ignore zeros in the resulting value when inserting each
2399 // set of bit groups, and then mask in the zeros at the end. With early
2400 // masking, we only insert the non-zero parts of the result at every step.
2402 unsigned InstCnt
= 0, InstCntLateMask
= 0;
2403 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2404 SDNode
*RN
= Select(N
, false, &InstCnt
);
2405 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt
<< " instructions\n");
2407 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2408 SDNode
*RNLM
= Select(N
, true, &InstCntLateMask
);
2409 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2410 << " instructions\n");
2412 if (InstCnt
<= InstCntLateMask
) {
2413 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2417 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2422 class IntegerCompareEliminator
{
2423 SelectionDAG
*CurDAG
;
2425 // Conversion type for interpreting results of a 32-bit instruction as
2426 // a 64-bit value or vice versa.
2427 enum ExtOrTruncConversion
{ Ext
, Trunc
};
2429 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2431 // ZExtOrig - use the original condition code, zero-extend value
2432 // ZExtInvert - invert the condition code, zero-extend value
2433 // SExtOrig - use the original condition code, sign-extend value
2434 // SExtInvert - invert the condition code, sign-extend value
2435 enum SetccInGPROpts
{ ZExtOrig
, ZExtInvert
, SExtOrig
, SExtInvert
};
2437 // Comparisons against zero to emit GPR code sequences for. Each of these
2438 // sequences may need to be emitted for two or more equivalent patterns.
2439 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2440 // matters as well as the extension type: sext (-1/0), zext (1/0).
2441 // GEZExt - (zext (LHS >= 0))
2442 // GESExt - (sext (LHS >= 0))
2443 // LEZExt - (zext (LHS <= 0))
2444 // LESExt - (sext (LHS <= 0))
2445 enum ZeroCompare
{ GEZExt
, GESExt
, LEZExt
, LESExt
};
2447 SDNode
*tryEXTEND(SDNode
*N
);
2448 SDNode
*tryLogicOpOfCompares(SDNode
*N
);
2449 SDValue
computeLogicOpInGPR(SDValue LogicOp
);
2450 SDValue
signExtendInputIfNeeded(SDValue Input
);
2451 SDValue
zeroExtendInputIfNeeded(SDValue Input
);
2452 SDValue
addExtOrTrunc(SDValue NatWidthRes
, ExtOrTruncConversion Conv
);
2453 SDValue
getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2455 SDValue
get32BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2456 int64_t RHSValue
, SDLoc dl
);
2457 SDValue
get32BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2458 int64_t RHSValue
, SDLoc dl
);
2459 SDValue
get64BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2460 int64_t RHSValue
, SDLoc dl
);
2461 SDValue
get64BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2462 int64_t RHSValue
, SDLoc dl
);
2463 SDValue
getSETCCInGPR(SDValue Compare
, SetccInGPROpts ConvOpts
);
2466 IntegerCompareEliminator(SelectionDAG
*DAG
,
2467 PPCDAGToDAGISel
*Sel
) : CurDAG(DAG
), S(Sel
) {
2468 assert(CurDAG
->getTargetLoweringInfo()
2469 .getPointerTy(CurDAG
->getDataLayout()).getSizeInBits() == 64 &&
2470 "Only expecting to use this on 64 bit targets.");
2472 SDNode
*Select(SDNode
*N
) {
2473 if (CmpInGPR
== ICGPR_None
)
2475 switch (N
->getOpcode()) {
2477 case ISD::ZERO_EXTEND
:
2478 if (CmpInGPR
== ICGPR_Sext
|| CmpInGPR
== ICGPR_SextI32
||
2479 CmpInGPR
== ICGPR_SextI64
)
2482 case ISD::SIGN_EXTEND
:
2483 if (CmpInGPR
== ICGPR_Zext
|| CmpInGPR
== ICGPR_ZextI32
||
2484 CmpInGPR
== ICGPR_ZextI64
)
2486 return tryEXTEND(N
);
2490 return tryLogicOpOfCompares(N
);
2496 static bool isLogicOp(unsigned Opc
) {
2497 return Opc
== ISD::AND
|| Opc
== ISD::OR
|| Opc
== ISD::XOR
;
2499 // The obvious case for wanting to keep the value in a GPR. Namely, the
2500 // result of the comparison is actually needed in a GPR.
2501 SDNode
*IntegerCompareEliminator::tryEXTEND(SDNode
*N
) {
2502 assert((N
->getOpcode() == ISD::ZERO_EXTEND
||
2503 N
->getOpcode() == ISD::SIGN_EXTEND
) &&
2504 "Expecting a zero/sign extend node!");
2506 // If we are zero-extending the result of a logical operation on i1
2507 // values, we can keep the values in GPRs.
2508 if (isLogicOp(N
->getOperand(0).getOpcode()) &&
2509 N
->getOperand(0).getValueType() == MVT::i1
&&
2510 N
->getOpcode() == ISD::ZERO_EXTEND
)
2511 WideRes
= computeLogicOpInGPR(N
->getOperand(0));
2512 else if (N
->getOperand(0).getOpcode() != ISD::SETCC
)
2516 getSETCCInGPR(N
->getOperand(0),
2517 N
->getOpcode() == ISD::SIGN_EXTEND
?
2518 SetccInGPROpts::SExtOrig
: SetccInGPROpts::ZExtOrig
);
2524 bool Input32Bit
= WideRes
.getValueType() == MVT::i32
;
2525 bool Output32Bit
= N
->getValueType(0) == MVT::i32
;
2527 NumSextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 1 : 0;
2528 NumZextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 0 : 1;
2530 SDValue ConvOp
= WideRes
;
2531 if (Input32Bit
!= Output32Bit
)
2532 ConvOp
= addExtOrTrunc(WideRes
, Input32Bit
? ExtOrTruncConversion::Ext
:
2533 ExtOrTruncConversion::Trunc
);
2534 return ConvOp
.getNode();
2537 // Attempt to perform logical operations on the results of comparisons while
2538 // keeping the values in GPRs. Without doing so, these would end up being
2539 // lowered to CR-logical operations which suffer from significant latency and
2541 SDNode
*IntegerCompareEliminator::tryLogicOpOfCompares(SDNode
*N
) {
2542 if (N
->getValueType(0) != MVT::i1
)
2544 assert(isLogicOp(N
->getOpcode()) &&
2545 "Expected a logic operation on setcc results.");
2546 SDValue LoweredLogical
= computeLogicOpInGPR(SDValue(N
, 0));
2547 if (!LoweredLogical
)
2551 bool IsBitwiseNegate
= LoweredLogical
.getMachineOpcode() == PPC::XORI8
;
2552 unsigned SubRegToExtract
= IsBitwiseNegate
? PPC::sub_eq
: PPC::sub_gt
;
2553 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
2554 SDValue LHS
= LoweredLogical
.getOperand(0);
2555 SDValue RHS
= LoweredLogical
.getOperand(1);
2557 SDValue OpToConvToRecForm
;
2559 // Look through any 32-bit to 64-bit implicit extend nodes to find the
2560 // opcode that is input to the XORI.
2561 if (IsBitwiseNegate
&&
2562 LoweredLogical
.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG
)
2563 OpToConvToRecForm
= LoweredLogical
.getOperand(0).getOperand(1);
2564 else if (IsBitwiseNegate
)
2565 // If the input to the XORI isn't an extension, that's what we're after.
2566 OpToConvToRecForm
= LoweredLogical
.getOperand(0);
2568 // If this is not an XORI, it is a reg-reg logical op and we can convert
2569 // it to record-form.
2570 OpToConvToRecForm
= LoweredLogical
;
2572 // Get the record-form version of the node we're looking to use to get the
2574 uint16_t NonRecOpc
= OpToConvToRecForm
.getMachineOpcode();
2575 int NewOpc
= PPCInstrInfo::getRecordFormOpcode(NonRecOpc
);
2577 // Convert the right node to record-form. This is either the logical we're
2578 // looking at or it is the input node to the negation (if we're looking at
2579 // a bitwise negation).
2580 if (NewOpc
!= -1 && IsBitwiseNegate
) {
2581 // The input to the XORI has a record-form. Use it.
2582 assert(LoweredLogical
.getConstantOperandVal(1) == 1 &&
2583 "Expected a PPC::XORI8 only for bitwise negation.");
2584 // Emit the record-form instruction.
2585 std::vector
<SDValue
> Ops
;
2586 for (int i
= 0, e
= OpToConvToRecForm
.getNumOperands(); i
< e
; i
++)
2587 Ops
.push_back(OpToConvToRecForm
.getOperand(i
));
2590 SDValue(CurDAG
->getMachineNode(NewOpc
, dl
,
2591 OpToConvToRecForm
.getValueType(),
2592 MVT::Glue
, Ops
), 0);
2594 assert((NewOpc
!= -1 || !IsBitwiseNegate
) &&
2595 "No record form available for AND8/OR8/XOR8?");
2597 SDValue(CurDAG
->getMachineNode(NewOpc
== -1 ? PPC::ANDIo8
: NewOpc
, dl
,
2598 MVT::i64
, MVT::Glue
, LHS
, RHS
), 0);
2601 // Select this node to a single bit from CR0 set by the record-form node
2602 // just created. For bitwise negation, use the EQ bit which is the equivalent
2603 // of negating the result (i.e. it is a bit set when the result of the
2604 // operation is zero).
2606 CurDAG
->getTargetConstant(SubRegToExtract
, dl
, MVT::i32
);
2608 SDValue(CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
, dl
,
2609 MVT::i1
, CR0Reg
, SRIdxVal
,
2610 WideOp
.getValue(1)), 0);
2611 return CRBit
.getNode();
2614 // Lower a logical operation on i1 values into a GPR sequence if possible.
2615 // The result can be kept in a GPR if requested.
2616 // Three types of inputs can be handled:
2619 // - Logical operation (AND/OR/XOR)
2620 // There is also a special case that is handled (namely a complement operation
2621 // achieved with xor %a, -1).
2622 SDValue
IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp
) {
2623 assert(isLogicOp(LogicOp
.getOpcode()) &&
2624 "Can only handle logic operations here.");
2625 assert(LogicOp
.getValueType() == MVT::i1
&&
2626 "Can only handle logic operations on i1 values here.");
2630 // Special case: xor %a, -1
2631 bool IsBitwiseNegation
= isBitwiseNot(LogicOp
);
2633 // Produces a GPR sequence for each operand of the binary logic operation.
2634 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2635 // the value in a GPR and for logic operations, it will recursively produce
2636 // a GPR sequence for the operation.
2637 auto getLogicOperand
= [&] (SDValue Operand
) -> SDValue
{
2638 unsigned OperandOpcode
= Operand
.getOpcode();
2639 if (OperandOpcode
== ISD::SETCC
)
2640 return getSETCCInGPR(Operand
, SetccInGPROpts::ZExtOrig
);
2641 else if (OperandOpcode
== ISD::TRUNCATE
) {
2642 SDValue InputOp
= Operand
.getOperand(0);
2643 EVT InVT
= InputOp
.getValueType();
2644 return SDValue(CurDAG
->getMachineNode(InVT
== MVT::i32
? PPC::RLDICL_32
:
2645 PPC::RLDICL
, dl
, InVT
, InputOp
,
2646 S
->getI64Imm(0, dl
),
2647 S
->getI64Imm(63, dl
)), 0);
2648 } else if (isLogicOp(OperandOpcode
))
2649 return computeLogicOpInGPR(Operand
);
2652 LHS
= getLogicOperand(LogicOp
.getOperand(0));
2653 RHS
= getLogicOperand(LogicOp
.getOperand(1));
2655 // If a GPR sequence can't be produced for the LHS we can't proceed.
2656 // Not producing a GPR sequence for the RHS is only a problem if this isn't
2657 // a bitwise negation operation.
2658 if (!LHS
|| (!RHS
&& !IsBitwiseNegation
))
2661 NumLogicOpsOnComparison
++;
2663 // We will use the inputs as 64-bit values.
2664 if (LHS
.getValueType() == MVT::i32
)
2665 LHS
= addExtOrTrunc(LHS
, ExtOrTruncConversion::Ext
);
2666 if (!IsBitwiseNegation
&& RHS
.getValueType() == MVT::i32
)
2667 RHS
= addExtOrTrunc(RHS
, ExtOrTruncConversion::Ext
);
2670 switch (LogicOp
.getOpcode()) {
2671 default: llvm_unreachable("Unknown logic operation.");
2672 case ISD::AND
: NewOpc
= PPC::AND8
; break;
2673 case ISD::OR
: NewOpc
= PPC::OR8
; break;
2674 case ISD::XOR
: NewOpc
= PPC::XOR8
; break;
2677 if (IsBitwiseNegation
) {
2678 RHS
= S
->getI64Imm(1, dl
);
2679 NewOpc
= PPC::XORI8
;
2682 return SDValue(CurDAG
->getMachineNode(NewOpc
, dl
, MVT::i64
, LHS
, RHS
), 0);
2686 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2687 /// Otherwise just reinterpret it as a 64-bit value.
2688 /// Useful when emitting comparison code for 32-bit values without using
2689 /// the compare instruction (which only considers the lower 32-bits).
2690 SDValue
IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input
) {
2691 assert(Input
.getValueType() == MVT::i32
&&
2692 "Can only sign-extend 32-bit values here.");
2693 unsigned Opc
= Input
.getOpcode();
2695 // The value was sign extended and then truncated to 32-bits. No need to
2696 // sign extend it again.
2697 if (Opc
== ISD::TRUNCATE
&&
2698 (Input
.getOperand(0).getOpcode() == ISD::AssertSext
||
2699 Input
.getOperand(0).getOpcode() == ISD::SIGN_EXTEND
))
2700 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2702 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2703 // The input is a sign-extending load. All ppc sign-extending loads
2704 // sign-extend to the full 64-bits.
2705 if (InputLoad
&& InputLoad
->getExtensionType() == ISD::SEXTLOAD
)
2706 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2708 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2709 // We don't sign-extend constants.
2711 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2714 SignExtensionsAdded
++;
2715 return SDValue(CurDAG
->getMachineNode(PPC::EXTSW_32_64
, dl
,
2716 MVT::i64
, Input
), 0);
2719 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2720 /// Otherwise just reinterpret it as a 64-bit value.
2721 /// Useful when emitting comparison code for 32-bit values without using
2722 /// the compare instruction (which only considers the lower 32-bits).
2723 SDValue
IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input
) {
2724 assert(Input
.getValueType() == MVT::i32
&&
2725 "Can only zero-extend 32-bit values here.");
2726 unsigned Opc
= Input
.getOpcode();
2728 // The only condition under which we can omit the actual extend instruction:
2729 // - The value is a positive constant
2730 // - The value comes from a load that isn't a sign-extending load
2731 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2732 bool IsTruncateOfZExt
= Opc
== ISD::TRUNCATE
&&
2733 (Input
.getOperand(0).getOpcode() == ISD::AssertZext
||
2734 Input
.getOperand(0).getOpcode() == ISD::ZERO_EXTEND
);
2735 if (IsTruncateOfZExt
)
2736 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2738 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2739 if (InputConst
&& InputConst
->getSExtValue() >= 0)
2740 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2742 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2743 // The input is a load that doesn't sign-extend (it will be zero-extended).
2744 if (InputLoad
&& InputLoad
->getExtensionType() != ISD::SEXTLOAD
)
2745 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2747 // None of the above, need to zero-extend.
2749 ZeroExtensionsAdded
++;
2750 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL_32_64
, dl
, MVT::i64
, Input
,
2751 S
->getI64Imm(0, dl
),
2752 S
->getI64Imm(32, dl
)), 0);
2755 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2756 // course not actual zero/sign extensions that will generate machine code,
2757 // they're just a way to reinterpret a 32 bit value in a register as a
2758 // 64 bit value and vice-versa.
2759 SDValue
IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes
,
2760 ExtOrTruncConversion Conv
) {
2761 SDLoc
dl(NatWidthRes
);
2763 // For reinterpreting 32-bit values as 64 bit values, we generate
2764 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2765 if (Conv
== ExtOrTruncConversion::Ext
) {
2766 SDValue
ImDef(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
, MVT::i64
), 0);
2768 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2769 return SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
, MVT::i64
,
2770 ImDef
, NatWidthRes
, SubRegIdx
), 0);
2773 assert(Conv
== ExtOrTruncConversion::Trunc
&&
2774 "Unknown convertion between 32 and 64 bit values.");
2775 // For reinterpreting 64-bit values as 32-bit values, we just need to
2776 // EXTRACT_SUBREG (i.e. extract the low word).
2778 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2779 return SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
, MVT::i32
,
2780 NatWidthRes
, SubRegIdx
), 0);
2783 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2784 // Handle both zero-extensions and sign-extensions.
2786 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2787 ZeroCompare CmpTy
) {
2788 EVT InVT
= LHS
.getValueType();
2789 bool Is32Bit
= InVT
== MVT::i32
;
2792 // Produce the value that needs to be either zero or sign extended.
2794 case ZeroCompare::GEZExt
:
2795 case ZeroCompare::GESExt
:
2796 ToExtend
= SDValue(CurDAG
->getMachineNode(Is32Bit
? PPC::NOR
: PPC::NOR8
,
2797 dl
, InVT
, LHS
, LHS
), 0);
2799 case ZeroCompare::LEZExt
:
2800 case ZeroCompare::LESExt
: {
2802 // Upper 32 bits cannot be undefined for this sequence.
2803 LHS
= signExtendInputIfNeeded(LHS
);
2805 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2807 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2808 Neg
, S
->getI64Imm(1, dl
),
2809 S
->getI64Imm(63, dl
)), 0);
2812 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
2813 S
->getI64Imm(~0ULL, dl
)), 0);
2814 ToExtend
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2821 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2823 (CmpTy
== ZeroCompare::GEZExt
|| CmpTy
== ZeroCompare::LEZExt
))
2824 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2825 ToExtend
, S
->getI64Imm(1, dl
),
2826 S
->getI64Imm(63, dl
)), 0);
2828 (CmpTy
== ZeroCompare::GESExt
|| CmpTy
== ZeroCompare::LESExt
))
2829 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, ToExtend
,
2830 S
->getI64Imm(63, dl
)), 0);
2832 assert(Is32Bit
&& "Should have handled the 32-bit sequences above.");
2833 // For 32-bit sequences, the extensions differ between GE/LE cases.
2835 case ZeroCompare::GEZExt
: {
2836 SDValue ShiftOps
[] = { ToExtend
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
2837 S
->getI32Imm(31, dl
) };
2838 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2841 case ZeroCompare::GESExt
:
2842 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, ToExtend
,
2843 S
->getI32Imm(31, dl
)), 0);
2844 case ZeroCompare::LEZExt
:
2845 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, ToExtend
,
2846 S
->getI32Imm(1, dl
)), 0);
2847 case ZeroCompare::LESExt
:
2848 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, ToExtend
,
2849 S
->getI32Imm(-1, dl
)), 0);
2852 // The above case covers all the enumerators so it can't have a default clause
2853 // to avoid compiler warnings.
2854 llvm_unreachable("Unknown zero-comparison type.");
2857 /// Produces a zero-extended result of comparing two 32-bit values according to
2858 /// the passed condition code.
2860 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS
, SDValue RHS
,
2862 int64_t RHSValue
, SDLoc dl
) {
2863 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
2864 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Sext
)
2866 bool IsRHSZero
= RHSValue
== 0;
2867 bool IsRHSOne
= RHSValue
== 1;
2868 bool IsRHSNegOne
= RHSValue
== -1LL;
2870 default: return SDValue();
2872 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2873 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
2874 SDValue Xor
= IsRHSZero
? LHS
:
2875 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2877 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2878 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
2879 S
->getI32Imm(31, dl
) };
2880 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2884 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2885 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
2886 SDValue Xor
= IsRHSZero
? LHS
:
2887 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2889 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2890 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
2891 S
->getI32Imm(31, dl
) };
2893 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
2894 return SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
2895 S
->getI32Imm(1, dl
)), 0);
2898 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2899 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
2901 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2903 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2904 // by swapping inputs and falling through.
2905 std::swap(LHS
, RHS
);
2906 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2907 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2911 if (CmpInGPR
== ICGPR_NonExtIn
)
2913 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2914 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
2916 if (CmpInGPR
== ICGPR_NonExtIn
)
2918 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
2921 // The upper 32-bits of the register can't be undefined for this sequence.
2922 LHS
= signExtendInputIfNeeded(LHS
);
2923 RHS
= signExtendInputIfNeeded(RHS
);
2925 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
2927 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Sub
,
2928 S
->getI64Imm(1, dl
), S
->getI64Imm(63, dl
)),
2931 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
,
2932 MVT::i64
, Shift
, S
->getI32Imm(1, dl
)), 0);
2935 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2936 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2937 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
2938 // Handle SETLT -1 (which is equivalent to SETGE 0).
2940 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2943 if (CmpInGPR
== ICGPR_NonExtIn
)
2945 // The upper 32-bits of the register can't be undefined for this sequence.
2946 LHS
= signExtendInputIfNeeded(LHS
);
2947 RHS
= signExtendInputIfNeeded(RHS
);
2949 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2950 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2951 Neg
, S
->getI32Imm(1, dl
), S
->getI32Imm(63, dl
)), 0);
2953 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2954 // (%b < %a) by swapping inputs and falling through.
2955 std::swap(LHS
, RHS
);
2956 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2957 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2958 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
2962 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2963 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
2964 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
2965 // Handle SETLT 1 (which is equivalent to SETLE 0).
2967 if (CmpInGPR
== ICGPR_NonExtIn
)
2969 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
2973 SDValue ShiftOps
[] = { LHS
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
2974 S
->getI32Imm(31, dl
) };
2975 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2979 if (CmpInGPR
== ICGPR_NonExtIn
)
2981 // The upper 32-bits of the register can't be undefined for this sequence.
2982 LHS
= signExtendInputIfNeeded(LHS
);
2983 RHS
= signExtendInputIfNeeded(RHS
);
2985 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
2986 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2987 SUBFNode
, S
->getI64Imm(1, dl
),
2988 S
->getI64Imm(63, dl
)), 0);
2991 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
2992 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
2993 std::swap(LHS
, RHS
);
2996 if (CmpInGPR
== ICGPR_NonExtIn
)
2998 // The upper 32-bits of the register can't be undefined for this sequence.
2999 LHS
= zeroExtendInputIfNeeded(LHS
);
3000 RHS
= zeroExtendInputIfNeeded(RHS
);
3002 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3004 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3005 Subtract
, S
->getI64Imm(1, dl
),
3006 S
->getI64Imm(63, dl
)), 0);
3007 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, SrdiNode
,
3008 S
->getI32Imm(1, dl
)), 0);
3011 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3012 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3013 std::swap(LHS
, RHS
);
3016 if (CmpInGPR
== ICGPR_NonExtIn
)
3018 // The upper 32-bits of the register can't be undefined for this sequence.
3019 LHS
= zeroExtendInputIfNeeded(LHS
);
3020 RHS
= zeroExtendInputIfNeeded(RHS
);
3022 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3023 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3024 Subtract
, S
->getI64Imm(1, dl
),
3025 S
->getI64Imm(63, dl
)), 0);
3030 /// Produces a sign-extended result of comparing two 32-bit values according to
3031 /// the passed condition code.
3033 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS
, SDValue RHS
,
3035 int64_t RHSValue
, SDLoc dl
) {
3036 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
3037 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Zext
)
3039 bool IsRHSZero
= RHSValue
== 0;
3040 bool IsRHSOne
= RHSValue
== 1;
3041 bool IsRHSNegOne
= RHSValue
== -1LL;
3044 default: return SDValue();
3046 // (sext (setcc %a, %b, seteq)) ->
3047 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3048 // (sext (setcc %a, 0, seteq)) ->
3049 // (ashr (shl (ctlz %a), 58), 63)
3050 SDValue CountInput
= IsRHSZero
? LHS
:
3051 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3053 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, CountInput
), 0);
3054 SDValue SHLOps
[] = { Cntlzw
, S
->getI32Imm(27, dl
),
3055 S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
3057 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, SHLOps
), 0);
3058 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Slwi
), 0);
3061 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3062 // flip the bit, finally take 2's complement.
3063 // (sext (setcc %a, %b, setne)) ->
3064 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3065 // Same as above, but the first xor is not needed.
3066 // (sext (setcc %a, 0, setne)) ->
3067 // (neg (xor (lshr (ctlz %a), 5), 1))
3068 SDValue Xor
= IsRHSZero
? LHS
:
3069 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3071 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
3072 SDValue ShiftOps
[] =
3073 { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
3075 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
3077 SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
3078 S
->getI32Imm(1, dl
)), 0);
3079 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Xori
), 0);
3082 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3083 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3085 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3087 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3088 // by swapping inputs and falling through.
3089 std::swap(LHS
, RHS
);
3090 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3091 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3095 if (CmpInGPR
== ICGPR_NonExtIn
)
3097 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3098 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3100 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3102 // The upper 32-bits of the register can't be undefined for this sequence.
3103 LHS
= signExtendInputIfNeeded(LHS
);
3104 RHS
= signExtendInputIfNeeded(RHS
);
3106 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, MVT::Glue
,
3109 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3110 SUBFNode
, S
->getI64Imm(1, dl
),
3111 S
->getI64Imm(63, dl
)), 0);
3112 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Srdi
,
3113 S
->getI32Imm(-1, dl
)), 0);
3116 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3117 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3118 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3120 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3122 if (CmpInGPR
== ICGPR_NonExtIn
)
3124 // The upper 32-bits of the register can't be undefined for this sequence.
3125 LHS
= signExtendInputIfNeeded(LHS
);
3126 RHS
= signExtendInputIfNeeded(RHS
);
3128 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
3129 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Neg
,
3130 S
->getI64Imm(63, dl
)), 0);
3132 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3133 // (%b < %a) by swapping inputs and falling through.
3134 std::swap(LHS
, RHS
);
3135 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3136 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3137 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3141 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3142 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3143 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3145 if (CmpInGPR
== ICGPR_NonExtIn
)
3147 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3150 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, LHS
,
3151 S
->getI32Imm(31, dl
)), 0);
3153 if (CmpInGPR
== ICGPR_NonExtIn
)
3155 // The upper 32-bits of the register can't be undefined for this sequence.
3156 LHS
= signExtendInputIfNeeded(LHS
);
3157 RHS
= signExtendInputIfNeeded(RHS
);
3159 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3160 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3161 SUBFNode
, S
->getI64Imm(63, dl
)), 0);
3164 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3165 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3166 std::swap(LHS
, RHS
);
3169 if (CmpInGPR
== ICGPR_NonExtIn
)
3171 // The upper 32-bits of the register can't be undefined for this sequence.
3172 LHS
= zeroExtendInputIfNeeded(LHS
);
3173 RHS
= zeroExtendInputIfNeeded(RHS
);
3175 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3177 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Subtract
,
3178 S
->getI32Imm(1, dl
), S
->getI32Imm(63,dl
)),
3180 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Shift
,
3181 S
->getI32Imm(-1, dl
)), 0);
3184 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3185 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3186 std::swap(LHS
, RHS
);
3189 if (CmpInGPR
== ICGPR_NonExtIn
)
3191 // The upper 32-bits of the register can't be undefined for this sequence.
3192 LHS
= zeroExtendInputIfNeeded(LHS
);
3193 RHS
= zeroExtendInputIfNeeded(RHS
);
3195 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3196 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3197 Subtract
, S
->getI64Imm(63, dl
)), 0);
3202 /// Produces a zero-extended result of comparing two 64-bit values according to
3203 /// the passed condition code.
3205 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS
, SDValue RHS
,
3207 int64_t RHSValue
, SDLoc dl
) {
3208 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3209 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Sext
)
3211 bool IsRHSZero
= RHSValue
== 0;
3212 bool IsRHSOne
= RHSValue
== 1;
3213 bool IsRHSNegOne
= RHSValue
== -1LL;
3215 default: return SDValue();
3217 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3218 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3219 SDValue Xor
= IsRHSZero
? LHS
:
3220 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3222 SDValue(CurDAG
->getMachineNode(PPC::CNTLZD
, dl
, MVT::i64
, Xor
), 0);
3223 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Clz
,
3224 S
->getI64Imm(58, dl
),
3225 S
->getI64Imm(63, dl
)), 0);
3228 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3229 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3230 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3231 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3232 SDValue Xor
= IsRHSZero
? LHS
:
3233 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3235 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3236 Xor
, S
->getI32Imm(~0U, dl
)), 0);
3237 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, AC
,
3238 Xor
, AC
.getValue(1)), 0);
3241 // {subc.reg, subc.CA} = (subcarry %a, %b)
3242 // (zext (setcc %a, %b, setge)) ->
3243 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3244 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3246 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3247 std::swap(LHS
, RHS
);
3248 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3249 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3253 // {subc.reg, subc.CA} = (subcarry %b, %a)
3254 // (zext (setcc %a, %b, setge)) ->
3255 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3256 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3258 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3260 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3261 S
->getI64Imm(1, dl
),
3262 S
->getI64Imm(63, dl
)), 0);
3264 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3265 S
->getI64Imm(63, dl
)), 0);
3266 SDValue SubtractCarry
=
3267 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3269 return SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3270 ShiftR
, ShiftL
, SubtractCarry
), 0);
3273 // {subc.reg, subc.CA} = (subcarry %b, %a)
3274 // (zext (setcc %a, %b, setgt)) ->
3275 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3276 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3278 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3281 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3282 S
->getI64Imm(~0ULL, dl
)), 0);
3284 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Addi
, LHS
), 0);
3285 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Nor
,
3286 S
->getI64Imm(1, dl
),
3287 S
->getI64Imm(63, dl
)), 0);
3289 std::swap(LHS
, RHS
);
3290 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3291 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3292 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3296 // {subc.reg, subc.CA} = (subcarry %a, %b)
3297 // (zext (setcc %a, %b, setlt)) ->
3298 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3299 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3301 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3303 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3304 S
->getI64Imm(1, dl
),
3305 S
->getI64Imm(63, dl
)), 0);
3307 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3308 LHS
, S
->getI64Imm(63, dl
)), 0);
3310 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3311 RHS
, S
->getI64Imm(1, dl
),
3312 S
->getI64Imm(63, dl
)), 0);
3313 SDValue SUBFC8Carry
=
3314 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3317 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3318 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3319 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3320 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3323 // {subc.reg, subc.CA} = (subcarry %a, %b)
3324 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3325 std::swap(LHS
, RHS
);
3328 // {subc.reg, subc.CA} = (subcarry %b, %a)
3329 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3330 SDValue SUBFC8Carry
=
3331 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3333 SDValue SUBFE8Node
=
3334 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
,
3335 LHS
, LHS
, SUBFC8Carry
), 0);
3336 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
,
3337 SUBFE8Node
, S
->getI64Imm(1, dl
)), 0);
3340 // {subc.reg, subc.CA} = (subcarry %b, %a)
3341 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3342 std::swap(LHS
, RHS
);
3345 // {subc.reg, subc.CA} = (subcarry %a, %b)
3346 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3347 SDValue SubtractCarry
=
3348 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3351 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3352 LHS
, LHS
, SubtractCarry
), 0);
3353 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3359 /// Produces a sign-extended result of comparing two 64-bit values according to
3360 /// the passed condition code.
3362 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS
, SDValue RHS
,
3364 int64_t RHSValue
, SDLoc dl
) {
3365 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3366 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Zext
)
3368 bool IsRHSZero
= RHSValue
== 0;
3369 bool IsRHSOne
= RHSValue
== 1;
3370 bool IsRHSNegOne
= RHSValue
== -1LL;
3372 default: return SDValue();
3374 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3375 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3376 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3377 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3378 SDValue AddInput
= IsRHSZero
? LHS
:
3379 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3381 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3382 AddInput
, S
->getI32Imm(~0U, dl
)), 0);
3383 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, Addic
,
3384 Addic
, Addic
.getValue(1)), 0);
3387 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3388 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3389 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3390 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3391 SDValue Xor
= IsRHSZero
? LHS
:
3392 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3394 SDValue(CurDAG
->getMachineNode(PPC::SUBFIC8
, dl
, MVT::i64
, MVT::Glue
,
3395 Xor
, S
->getI32Imm(0, dl
)), 0);
3396 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, SC
,
3397 SC
, SC
.getValue(1)), 0);
3400 // {subc.reg, subc.CA} = (subcarry %a, %b)
3401 // (zext (setcc %a, %b, setge)) ->
3402 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3403 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3405 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3406 std::swap(LHS
, RHS
);
3407 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3408 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3412 // {subc.reg, subc.CA} = (subcarry %b, %a)
3413 // (zext (setcc %a, %b, setge)) ->
3414 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3415 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3417 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3419 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3420 S
->getI64Imm(63, dl
)), 0);
3422 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3423 S
->getI64Imm(1, dl
),
3424 S
->getI64Imm(63, dl
)), 0);
3425 SDValue SubtractCarry
=
3426 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3429 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3430 ShiftR
, ShiftL
, SubtractCarry
), 0);
3431 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, Adde
), 0);
3434 // {subc.reg, subc.CA} = (subcarry %b, %a)
3435 // (zext (setcc %a, %b, setgt)) ->
3436 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3437 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3439 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3442 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3443 S
->getI64Imm(-1, dl
)), 0);
3445 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Add
, LHS
), 0);
3446 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Nor
,
3447 S
->getI64Imm(63, dl
)), 0);
3449 std::swap(LHS
, RHS
);
3450 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3451 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3452 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3456 // {subc.reg, subc.CA} = (subcarry %a, %b)
3457 // (zext (setcc %a, %b, setlt)) ->
3458 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3459 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3461 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3463 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, LHS
,
3464 S
->getI64Imm(63, dl
)), 0);
3467 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3468 LHS
, S
->getI64Imm(63, dl
)), 0);
3470 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3471 RHS
, S
->getI64Imm(1, dl
),
3472 S
->getI64Imm(63, dl
)), 0);
3473 SDValue SUBFC8Carry
=
3474 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3477 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
,
3478 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3480 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3481 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3482 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3486 // {subc.reg, subc.CA} = (subcarry %a, %b)
3487 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3488 std::swap(LHS
, RHS
);
3491 // {subc.reg, subc.CA} = (subcarry %b, %a)
3492 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3493 SDValue SubtractCarry
=
3494 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3497 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
, LHS
,
3498 LHS
, SubtractCarry
), 0);
3499 return SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
,
3500 ExtSub
, ExtSub
), 0);
3503 // {subc.reg, subc.CA} = (subcarry %b, %a)
3504 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3505 std::swap(LHS
, RHS
);
3508 // {subc.reg, subc.CA} = (subcarry %a, %b)
3509 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3511 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3513 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3514 LHS
, LHS
, SubCarry
), 0);
3519 /// Do all uses of this SDValue need the result in a GPR?
3520 /// This is meant to be used on values that have type i1 since
3521 /// it is somewhat meaningless to ask if values of other types
3522 /// should be kept in GPR's.
3523 static bool allUsesExtend(SDValue Compare
, SelectionDAG
*CurDAG
) {
3524 assert(Compare
.getOpcode() == ISD::SETCC
&&
3525 "An ISD::SETCC node required here.");
3527 // For values that have a single use, the caller should obviously already have
3528 // checked if that use is an extending use. We check the other uses here.
3529 if (Compare
.hasOneUse())
3531 // We want the value in a GPR if it is being extended, used for a select, or
3532 // used in logical operations.
3533 for (auto CompareUse
: Compare
.getNode()->uses())
3534 if (CompareUse
->getOpcode() != ISD::SIGN_EXTEND
&&
3535 CompareUse
->getOpcode() != ISD::ZERO_EXTEND
&&
3536 CompareUse
->getOpcode() != ISD::SELECT
&&
3537 !isLogicOp(CompareUse
->getOpcode())) {
3538 OmittedForNonExtendUses
++;
3544 /// Returns an equivalent of a SETCC node but with the result the same width as
3545 /// the inputs. This can also be used for SELECT_CC if either the true or false
3546 /// values is a power of two while the other is zero.
3547 SDValue
IntegerCompareEliminator::getSETCCInGPR(SDValue Compare
,
3548 SetccInGPROpts ConvOpts
) {
3549 assert((Compare
.getOpcode() == ISD::SETCC
||
3550 Compare
.getOpcode() == ISD::SELECT_CC
) &&
3551 "An ISD::SETCC node required here.");
3553 // Don't convert this comparison to a GPR sequence because there are uses
3554 // of the i1 result (i.e. uses that require the result in the CR).
3555 if ((Compare
.getOpcode() == ISD::SETCC
) && !allUsesExtend(Compare
, CurDAG
))
3558 SDValue LHS
= Compare
.getOperand(0);
3559 SDValue RHS
= Compare
.getOperand(1);
3561 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3562 int CCOpNum
= Compare
.getOpcode() == ISD::SELECT_CC
? 4 : 2;
3564 cast
<CondCodeSDNode
>(Compare
.getOperand(CCOpNum
))->get();
3565 EVT InputVT
= LHS
.getValueType();
3566 if (InputVT
!= MVT::i32
&& InputVT
!= MVT::i64
)
3569 if (ConvOpts
== SetccInGPROpts::ZExtInvert
||
3570 ConvOpts
== SetccInGPROpts::SExtInvert
)
3571 CC
= ISD::getSetCCInverse(CC
, true);
3573 bool Inputs32Bit
= InputVT
== MVT::i32
;
3576 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3577 int64_t RHSValue
= RHSConst
? RHSConst
->getSExtValue() : INT64_MAX
;
3578 bool IsSext
= ConvOpts
== SetccInGPROpts::SExtOrig
||
3579 ConvOpts
== SetccInGPROpts::SExtInvert
;
3581 if (IsSext
&& Inputs32Bit
)
3582 return get32BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3583 else if (Inputs32Bit
)
3584 return get32BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3586 return get64BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3587 return get64BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3590 } // end anonymous namespace
3592 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode
*N
) {
3593 if (N
->getValueType(0) != MVT::i32
&&
3594 N
->getValueType(0) != MVT::i64
)
3597 // This optimization will emit code that assumes 64-bit registers
3598 // so we don't want to run it in 32-bit mode. Also don't run it
3599 // on functions that are not to be optimized.
3600 if (TM
.getOptLevel() == CodeGenOpt::None
|| !TM
.isPPC64())
3603 switch (N
->getOpcode()) {
3605 case ISD::ZERO_EXTEND
:
3606 case ISD::SIGN_EXTEND
:
3610 IntegerCompareEliminator
ICmpElim(CurDAG
, this);
3611 if (SDNode
*New
= ICmpElim
.Select(N
)) {
3612 ReplaceNode(N
, New
);
3620 bool PPCDAGToDAGISel::tryBitPermutation(SDNode
*N
) {
3621 if (N
->getValueType(0) != MVT::i32
&&
3622 N
->getValueType(0) != MVT::i64
)
3625 if (!UseBitPermRewriter
)
3628 switch (N
->getOpcode()) {
3635 BitPermutationSelector
BPS(CurDAG
);
3636 if (SDNode
*New
= BPS
.Select(N
)) {
3637 ReplaceNode(N
, New
);
3647 /// SelectCC - Select a comparison of the specified values with the specified
3648 /// condition code, returning the CR# of the expression.
3649 SDValue
PPCDAGToDAGISel::SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
3651 // Always select the LHS.
3654 if (LHS
.getValueType() == MVT::i32
) {
3656 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3657 if (isInt32Immediate(RHS
, Imm
)) {
3658 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3659 if (isUInt
<16>(Imm
))
3660 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3661 getI32Imm(Imm
& 0xFFFF, dl
)),
3663 // If this is a 16-bit signed immediate, fold it.
3664 if (isInt
<16>((int)Imm
))
3665 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3666 getI32Imm(Imm
& 0xFFFF, dl
)),
3669 // For non-equality comparisons, the default code would materialize the
3670 // constant, then compare against it, like this:
3672 // ori r2, r2, 22136
3674 // Since we are just comparing for equality, we can emit this instead:
3675 // xoris r0,r3,0x1234
3676 // cmplwi cr0,r0,0x5678
3678 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS
, dl
, MVT::i32
, LHS
,
3679 getI32Imm(Imm
>> 16, dl
)), 0);
3680 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, Xor
,
3681 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3684 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3685 if (isInt32Immediate(RHS
, Imm
) && isUInt
<16>(Imm
))
3686 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3687 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3691 if (isIntS16Immediate(RHS
, SImm
))
3692 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3693 getI32Imm((int)SImm
& 0xFFFF,
3698 } else if (LHS
.getValueType() == MVT::i64
) {
3700 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3701 if (isInt64Immediate(RHS
.getNode(), Imm
)) {
3702 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3703 if (isUInt
<16>(Imm
))
3704 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3705 getI32Imm(Imm
& 0xFFFF, dl
)),
3707 // If this is a 16-bit signed immediate, fold it.
3709 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3710 getI32Imm(Imm
& 0xFFFF, dl
)),
3713 // For non-equality comparisons, the default code would materialize the
3714 // constant, then compare against it, like this:
3716 // ori r2, r2, 22136
3718 // Since we are just comparing for equality, we can emit this instead:
3719 // xoris r0,r3,0x1234
3720 // cmpldi cr0,r0,0x5678
3722 if (isUInt
<32>(Imm
)) {
3723 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS8
, dl
, MVT::i64
, LHS
,
3724 getI64Imm(Imm
>> 16, dl
)), 0);
3725 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, Xor
,
3726 getI64Imm(Imm
& 0xFFFF, dl
)),
3731 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3732 if (isInt64Immediate(RHS
.getNode(), Imm
) && isUInt
<16>(Imm
))
3733 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3734 getI64Imm(Imm
& 0xFFFF, dl
)), 0);
3738 if (isIntS16Immediate(RHS
, SImm
))
3739 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3740 getI64Imm(SImm
& 0xFFFF, dl
)),
3744 } else if (LHS
.getValueType() == MVT::f32
) {
3745 if (PPCSubTarget
->hasSPE()) {
3750 Opc
= PPC::EFSCMPEQ
;
3758 Opc
= PPC::EFSCMPLT
;
3766 Opc
= PPC::EFSCMPGT
;
3771 } else if (LHS
.getValueType() == MVT::f64
) {
3772 if (PPCSubTarget
->hasSPE()) {
3777 Opc
= PPC::EFDCMPEQ
;
3785 Opc
= PPC::EFDCMPLT
;
3793 Opc
= PPC::EFDCMPGT
;
3797 Opc
= PPCSubTarget
->hasVSX() ? PPC::XSCMPUDP
: PPC::FCMPUD
;
3799 assert(LHS
.getValueType() == MVT::f128
&& "Unknown vt!");
3800 assert(PPCSubTarget
->hasVSX() && "__float128 requires VSX");
3801 Opc
= PPC::XSCMPUQP
;
3803 return SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, LHS
, RHS
), 0);
3806 static PPC::Predicate
getPredicateForSetCC(ISD::CondCode CC
) {
3812 llvm_unreachable("Should be lowered by legalize!");
3813 default: llvm_unreachable("Unknown condition!");
3815 case ISD::SETEQ
: return PPC::PRED_EQ
;
3817 case ISD::SETNE
: return PPC::PRED_NE
;
3819 case ISD::SETLT
: return PPC::PRED_LT
;
3821 case ISD::SETLE
: return PPC::PRED_LE
;
3823 case ISD::SETGT
: return PPC::PRED_GT
;
3825 case ISD::SETGE
: return PPC::PRED_GE
;
3826 case ISD::SETO
: return PPC::PRED_NU
;
3827 case ISD::SETUO
: return PPC::PRED_UN
;
3828 // These two are invalid for floating point. Assume we have int.
3829 case ISD::SETULT
: return PPC::PRED_LT
;
3830 case ISD::SETUGT
: return PPC::PRED_GT
;
3834 /// getCRIdxForSetCC - Return the index of the condition register field
3835 /// associated with the SetCC condition, and whether or not the field is
3836 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
3837 static unsigned getCRIdxForSetCC(ISD::CondCode CC
, bool &Invert
) {
3840 default: llvm_unreachable("Unknown condition!");
3842 case ISD::SETLT
: return 0; // Bit #0 = SETOLT
3844 case ISD::SETGT
: return 1; // Bit #1 = SETOGT
3846 case ISD::SETEQ
: return 2; // Bit #2 = SETOEQ
3847 case ISD::SETUO
: return 3; // Bit #3 = SETUO
3849 case ISD::SETGE
: Invert
= true; return 0; // !Bit #0 = SETUGE
3851 case ISD::SETLE
: Invert
= true; return 1; // !Bit #1 = SETULE
3853 case ISD::SETNE
: Invert
= true; return 2; // !Bit #2 = SETUNE
3854 case ISD::SETO
: Invert
= true; return 3; // !Bit #3 = SETO
3859 llvm_unreachable("Invalid branch code: should be expanded by legalize");
3860 // These are invalid for floating point. Assume integer.
3861 case ISD::SETULT
: return 0;
3862 case ISD::SETUGT
: return 1;
3866 // getVCmpInst: return the vector compare instruction for the specified
3867 // vector type and condition code. Since this is for altivec specific code,
3868 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3869 static unsigned int getVCmpInst(MVT VecVT
, ISD::CondCode CC
,
3870 bool HasVSX
, bool &Swap
, bool &Negate
) {
3874 if (VecVT
.isFloatingPoint()) {
3875 /* Handle some cases by swapping input operands. */
3877 case ISD::SETLE
: CC
= ISD::SETGE
; Swap
= true; break;
3878 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
3879 case ISD::SETOLE
: CC
= ISD::SETOGE
; Swap
= true; break;
3880 case ISD::SETOLT
: CC
= ISD::SETOGT
; Swap
= true; break;
3881 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
3882 case ISD::SETUGT
: CC
= ISD::SETULT
; Swap
= true; break;
3885 /* Handle some cases by negating the result. */
3887 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
3888 case ISD::SETUNE
: CC
= ISD::SETOEQ
; Negate
= true; break;
3889 case ISD::SETULE
: CC
= ISD::SETOGT
; Negate
= true; break;
3890 case ISD::SETULT
: CC
= ISD::SETOGE
; Negate
= true; break;
3893 /* We have instructions implementing the remaining cases. */
3897 if (VecVT
== MVT::v4f32
)
3898 return HasVSX
? PPC::XVCMPEQSP
: PPC::VCMPEQFP
;
3899 else if (VecVT
== MVT::v2f64
)
3900 return PPC::XVCMPEQDP
;
3904 if (VecVT
== MVT::v4f32
)
3905 return HasVSX
? PPC::XVCMPGTSP
: PPC::VCMPGTFP
;
3906 else if (VecVT
== MVT::v2f64
)
3907 return PPC::XVCMPGTDP
;
3911 if (VecVT
== MVT::v4f32
)
3912 return HasVSX
? PPC::XVCMPGESP
: PPC::VCMPGEFP
;
3913 else if (VecVT
== MVT::v2f64
)
3914 return PPC::XVCMPGEDP
;
3919 llvm_unreachable("Invalid floating-point vector compare condition");
3921 /* Handle some cases by swapping input operands. */
3923 case ISD::SETGE
: CC
= ISD::SETLE
; Swap
= true; break;
3924 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
3925 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
3926 case ISD::SETULT
: CC
= ISD::SETUGT
; Swap
= true; break;
3929 /* Handle some cases by negating the result. */
3931 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
3932 case ISD::SETUNE
: CC
= ISD::SETUEQ
; Negate
= true; break;
3933 case ISD::SETLE
: CC
= ISD::SETGT
; Negate
= true; break;
3934 case ISD::SETULE
: CC
= ISD::SETUGT
; Negate
= true; break;
3937 /* We have instructions implementing the remaining cases. */
3941 if (VecVT
== MVT::v16i8
)
3942 return PPC::VCMPEQUB
;
3943 else if (VecVT
== MVT::v8i16
)
3944 return PPC::VCMPEQUH
;
3945 else if (VecVT
== MVT::v4i32
)
3946 return PPC::VCMPEQUW
;
3947 else if (VecVT
== MVT::v2i64
)
3948 return PPC::VCMPEQUD
;
3951 if (VecVT
== MVT::v16i8
)
3952 return PPC::VCMPGTSB
;
3953 else if (VecVT
== MVT::v8i16
)
3954 return PPC::VCMPGTSH
;
3955 else if (VecVT
== MVT::v4i32
)
3956 return PPC::VCMPGTSW
;
3957 else if (VecVT
== MVT::v2i64
)
3958 return PPC::VCMPGTSD
;
3961 if (VecVT
== MVT::v16i8
)
3962 return PPC::VCMPGTUB
;
3963 else if (VecVT
== MVT::v8i16
)
3964 return PPC::VCMPGTUH
;
3965 else if (VecVT
== MVT::v4i32
)
3966 return PPC::VCMPGTUW
;
3967 else if (VecVT
== MVT::v2i64
)
3968 return PPC::VCMPGTUD
;
3973 llvm_unreachable("Invalid integer vector compare condition");
3977 bool PPCDAGToDAGISel::trySETCC(SDNode
*N
) {
3980 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
3982 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
3983 bool isPPC64
= (PtrVT
== MVT::i64
);
3985 if (!PPCSubTarget
->useCRBits() &&
3986 isInt32Immediate(N
->getOperand(1), Imm
)) {
3987 // We can codegen setcc op, imm very efficiently compared to a brcond.
3988 // Check for those cases here.
3991 SDValue Op
= N
->getOperand(0);
3995 Op
= SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Op
), 0);
3996 SDValue Ops
[] = { Op
, getI32Imm(27, dl
), getI32Imm(5, dl
),
3997 getI32Imm(31, dl
) };
3998 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4004 SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4005 Op
, getI32Imm(~0U, dl
)), 0);
4006 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, AD
, Op
, AD
.getValue(1));
4010 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4011 getI32Imm(31, dl
) };
4012 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4017 SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Op
), 0);
4018 T
= SDValue(CurDAG
->getMachineNode(PPC::ANDC
, dl
, MVT::i32
, T
, Op
), 0);
4019 SDValue Ops
[] = { T
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4020 getI32Imm(31, dl
) };
4021 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4025 } else if (Imm
== ~0U) { // setcc op, -1
4026 SDValue Op
= N
->getOperand(0);
4031 Op
= SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4032 Op
, getI32Imm(1, dl
)), 0);
4033 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
,
4034 SDValue(CurDAG
->getMachineNode(PPC::LI
, dl
,
4037 0), Op
.getValue(1));
4041 Op
= SDValue(CurDAG
->getMachineNode(PPC::NOR
, dl
, MVT::i32
, Op
, Op
), 0);
4042 SDNode
*AD
= CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4043 Op
, getI32Imm(~0U, dl
));
4044 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(AD
, 0), Op
,
4049 SDValue AD
= SDValue(CurDAG
->getMachineNode(PPC::ADDI
, dl
, MVT::i32
, Op
,
4050 getI32Imm(1, dl
)), 0);
4051 SDValue AN
= SDValue(CurDAG
->getMachineNode(PPC::AND
, dl
, MVT::i32
, AD
,
4053 SDValue Ops
[] = { AN
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4054 getI32Imm(31, dl
) };
4055 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4059 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4060 getI32Imm(31, dl
) };
4061 Op
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4062 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Op
, getI32Imm(1, dl
));
4069 SDValue LHS
= N
->getOperand(0);
4070 SDValue RHS
= N
->getOperand(1);
4072 // Altivec Vector compare instructions do not set any CR register by default and
4073 // vector compare operations return the same type as the operands.
4074 if (LHS
.getValueType().isVector()) {
4075 if (PPCSubTarget
->hasQPX() || PPCSubTarget
->hasSPE())
4078 EVT VecVT
= LHS
.getValueType();
4080 unsigned int VCmpInst
= getVCmpInst(VecVT
.getSimpleVT(), CC
,
4081 PPCSubTarget
->hasVSX(), Swap
, Negate
);
4083 std::swap(LHS
, RHS
);
4085 EVT ResVT
= VecVT
.changeVectorElementTypeToInteger();
4087 SDValue
VCmp(CurDAG
->getMachineNode(VCmpInst
, dl
, ResVT
, LHS
, RHS
), 0);
4088 CurDAG
->SelectNodeTo(N
, PPCSubTarget
->hasVSX() ? PPC::XXLNOR
: PPC::VNOR
,
4093 CurDAG
->SelectNodeTo(N
, VCmpInst
, ResVT
, LHS
, RHS
);
4097 if (PPCSubTarget
->useCRBits())
4101 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
4102 SDValue CCReg
= SelectCC(LHS
, RHS
, CC
, dl
);
4105 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4106 // The correct compare instruction is already set by SelectCC()
4107 if (PPCSubTarget
->hasSPE() && LHS
.getValueType().isFloatingPoint()) {
4111 // Force the ccreg into CR7.
4112 SDValue CR7Reg
= CurDAG
->getRegister(PPC::CR7
, MVT::i32
);
4114 SDValue
InFlag(nullptr, 0); // Null incoming flag value.
4115 CCReg
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, CR7Reg
, CCReg
,
4116 InFlag
).getValue(1);
4118 IntCR
= SDValue(CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
, CR7Reg
,
4121 SDValue Ops
[] = { IntCR
, getI32Imm((32 - (3 - Idx
)) & 31, dl
),
4122 getI32Imm(31, dl
), getI32Imm(31, dl
) };
4124 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4128 // Get the specified bit.
4130 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4131 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Tmp
, getI32Imm(1, dl
));
4135 /// Does this node represent a load/store node whose address can be represented
4136 /// with a register plus an immediate that's a multiple of \p Val:
4137 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const {
4138 LoadSDNode
*LDN
= dyn_cast
<LoadSDNode
>(N
);
4139 StoreSDNode
*STN
= dyn_cast
<StoreSDNode
>(N
);
4142 AddrOp
= LDN
->getOperand(1);
4144 AddrOp
= STN
->getOperand(2);
4146 // If the address points a frame object or a frame object with an offset,
4147 // we need to check the object alignment.
4149 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(
4150 AddrOp
.getOpcode() == ISD::ADD
? AddrOp
.getOperand(0) :
4152 // If op0 is a frame index that is under aligned, we can't do it either,
4153 // because it is translated to r31 or r1 + slot + offset. We won't know the
4154 // slot number until the stack frame is finalized.
4155 const MachineFrameInfo
&MFI
= CurDAG
->getMachineFunction().getFrameInfo();
4156 unsigned SlotAlign
= MFI
.getObjectAlignment(FI
->getIndex());
4157 if ((SlotAlign
% Val
) != 0)
4160 // If we have an offset, we need further check on the offset.
4161 if (AddrOp
.getOpcode() != ISD::ADD
)
4165 if (AddrOp
.getOpcode() == ISD::ADD
)
4166 return isIntS16Immediate(AddrOp
.getOperand(1), Imm
) && !(Imm
% Val
);
4168 // If the address comes from the outside, the offset will be zero.
4169 return AddrOp
.getOpcode() == ISD::CopyFromReg
;
4172 void PPCDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
4173 // Transfer memoperands.
4174 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
4175 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
4178 static bool mayUseP9Setb(SDNode
*N
, const ISD::CondCode
&CC
, SelectionDAG
*DAG
,
4179 bool &NeedSwapOps
, bool &IsUnCmp
) {
4181 assert(N
->getOpcode() == ISD::SELECT_CC
&& "Expecting a SELECT_CC here.");
4183 SDValue LHS
= N
->getOperand(0);
4184 SDValue RHS
= N
->getOperand(1);
4185 SDValue TrueRes
= N
->getOperand(2);
4186 SDValue FalseRes
= N
->getOperand(3);
4187 ConstantSDNode
*TrueConst
= dyn_cast
<ConstantSDNode
>(TrueRes
);
4191 assert((N
->getSimpleValueType(0) == MVT::i64
||
4192 N
->getSimpleValueType(0) == MVT::i32
) &&
4193 "Expecting either i64 or i32 here.");
4195 // We are looking for any of:
4196 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4197 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4198 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4199 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4200 int64_t TrueResVal
= TrueConst
->getSExtValue();
4201 if ((TrueResVal
< -1 || TrueResVal
> 1) ||
4202 (TrueResVal
== -1 && FalseRes
.getOpcode() != ISD::ZERO_EXTEND
) ||
4203 (TrueResVal
== 1 && FalseRes
.getOpcode() != ISD::SIGN_EXTEND
) ||
4205 (FalseRes
.getOpcode() != ISD::SELECT_CC
|| CC
!= ISD::SETEQ
)))
4208 bool InnerIsSel
= FalseRes
.getOpcode() == ISD::SELECT_CC
;
4209 SDValue SetOrSelCC
= InnerIsSel
? FalseRes
: FalseRes
.getOperand(0);
4210 if (SetOrSelCC
.getOpcode() != ISD::SETCC
&&
4211 SetOrSelCC
.getOpcode() != ISD::SELECT_CC
)
4214 // Without this setb optimization, the outer SELECT_CC will be manually
4215 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4216 // transforms pseduo instruction to isel instruction. When there are more than
4217 // one use for result like zext/sext, with current optimization we only see
4218 // isel is replaced by setb but can't see any significant gain. Since
4219 // setb has longer latency than original isel, we should avoid this. Another
4220 // point is that setb requires comparison always kept, it can break the
4221 // oppotunity to get the comparison away if we have in future.
4222 if (!SetOrSelCC
.hasOneUse() || (!InnerIsSel
&& !FalseRes
.hasOneUse()))
4225 SDValue InnerLHS
= SetOrSelCC
.getOperand(0);
4226 SDValue InnerRHS
= SetOrSelCC
.getOperand(1);
4227 ISD::CondCode InnerCC
=
4228 cast
<CondCodeSDNode
>(SetOrSelCC
.getOperand(InnerIsSel
? 4 : 2))->get();
4229 // If the inner comparison is a select_cc, make sure the true/false values are
4230 // 1/-1 and canonicalize it if needed.
4232 ConstantSDNode
*SelCCTrueConst
=
4233 dyn_cast
<ConstantSDNode
>(SetOrSelCC
.getOperand(2));
4234 ConstantSDNode
*SelCCFalseConst
=
4235 dyn_cast
<ConstantSDNode
>(SetOrSelCC
.getOperand(3));
4236 if (!SelCCTrueConst
|| !SelCCFalseConst
)
4238 int64_t SelCCTVal
= SelCCTrueConst
->getSExtValue();
4239 int64_t SelCCFVal
= SelCCFalseConst
->getSExtValue();
4240 // The values must be -1/1 (requiring a swap) or 1/-1.
4241 if (SelCCTVal
== -1 && SelCCFVal
== 1) {
4242 std::swap(InnerLHS
, InnerRHS
);
4243 } else if (SelCCTVal
!= 1 || SelCCFVal
!= -1)
4247 // Canonicalize unsigned case
4248 if (InnerCC
== ISD::SETULT
|| InnerCC
== ISD::SETUGT
) {
4250 InnerCC
= (InnerCC
== ISD::SETULT
) ? ISD::SETLT
: ISD::SETGT
;
4253 bool InnerSwapped
= false;
4254 if (LHS
== InnerRHS
&& RHS
== InnerLHS
)
4255 InnerSwapped
= true;
4256 else if (LHS
!= InnerLHS
|| RHS
!= InnerRHS
)
4260 // (select_cc lhs, rhs, 0, \
4261 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4265 if (InnerCC
!= ISD::SETLT
&& InnerCC
!= ISD::SETGT
)
4267 NeedSwapOps
= (InnerCC
== ISD::SETGT
) ? InnerSwapped
: !InnerSwapped
;
4270 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4271 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4272 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4273 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4274 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4275 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4277 if (!IsUnCmp
&& InnerCC
!= ISD::SETNE
)
4282 if (InnerCC
== ISD::SETNE
|| (InnerCC
== ISD::SETGT
&& !InnerSwapped
) ||
4283 (InnerCC
== ISD::SETLT
&& InnerSwapped
))
4284 NeedSwapOps
= (TrueResVal
== 1);
4289 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4290 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4291 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4292 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4293 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4294 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4296 if (!IsUnCmp
&& InnerCC
!= ISD::SETNE
)
4301 if (InnerCC
== ISD::SETNE
|| (InnerCC
== ISD::SETLT
&& !InnerSwapped
) ||
4302 (InnerCC
== ISD::SETGT
&& InnerSwapped
))
4303 NeedSwapOps
= (TrueResVal
== -1);
4312 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4313 LLVM_DEBUG(N
->dump());
4318 // Select - Convert the specified operand from a target-independent to a
4319 // target-specific node if it hasn't already been changed.
4320 void PPCDAGToDAGISel::Select(SDNode
*N
) {
4322 if (N
->isMachineOpcode()) {
4324 return; // Already selected.
4327 // In case any misguided DAG-level optimizations form an ADD with a
4328 // TargetConstant operand, crash here instead of miscompiling (by selecting
4329 // an r+r add instead of some kind of r+i add).
4330 if (N
->getOpcode() == ISD::ADD
&&
4331 N
->getOperand(1).getOpcode() == ISD::TargetConstant
)
4332 llvm_unreachable("Invalid ADD with TargetConstant operand");
4334 // Try matching complex bit permutations before doing anything else.
4335 if (tryBitPermutation(N
))
4338 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4339 if (tryIntCompareInGPR(N
))
4342 switch (N
->getOpcode()) {
4346 if (N
->getValueType(0) == MVT::i64
) {
4347 ReplaceNode(N
, selectI64Imm(CurDAG
, N
));
4357 case PPCISD::CALL
: {
4358 const Module
*M
= MF
->getFunction().getParent();
4360 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) != MVT::i32
||
4361 !PPCSubTarget
->isSecurePlt() || !PPCSubTarget
->isTargetELF() ||
4362 M
->getPICLevel() == PICLevel::SmallPIC
)
4365 SDValue Op
= N
->getOperand(1);
4367 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
4368 if (GA
->getTargetFlags() == PPCII::MO_PLT
)
4371 else if (ExternalSymbolSDNode
*ES
= dyn_cast
<ExternalSymbolSDNode
>(Op
)) {
4372 if (ES
->getTargetFlags() == PPCII::MO_PLT
)
4378 case PPCISD::GlobalBaseReg
:
4379 ReplaceNode(N
, getGlobalBaseReg());
4382 case ISD::FrameIndex
:
4383 selectFrameIndex(N
, N
);
4386 case PPCISD::MFOCRF
: {
4387 SDValue InFlag
= N
->getOperand(1);
4388 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
,
4389 N
->getOperand(0), InFlag
));
4393 case PPCISD::READ_TIME_BASE
:
4394 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ReadTB
, dl
, MVT::i32
, MVT::i32
,
4395 MVT::Other
, N
->getOperand(0)));
4398 case PPCISD::SRA_ADDZE
: {
4399 SDValue N0
= N
->getOperand(0);
4401 CurDAG
->getTargetConstant(*cast
<ConstantSDNode
>(N
->getOperand(1))->
4402 getConstantIntValue(), dl
,
4403 N
->getValueType(0));
4404 if (N
->getValueType(0) == MVT::i64
) {
4406 CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, MVT::Glue
,
4408 CurDAG
->SelectNodeTo(N
, PPC::ADDZE8
, MVT::i64
, SDValue(Op
, 0),
4412 assert(N
->getValueType(0) == MVT::i32
&&
4413 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4415 CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, MVT::Glue
,
4417 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
, SDValue(Op
, 0),
4424 // Change TLS initial-exec D-form stores to X-form stores.
4425 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
4426 if (EnableTLSOpt
&& PPCSubTarget
->isELFv2ABI() &&
4427 ST
->getAddressingMode() != ISD::PRE_INC
)
4428 if (tryTLSXFormStore(ST
))
4433 // Handle preincrement loads.
4434 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
4435 EVT LoadedVT
= LD
->getMemoryVT();
4437 // Normal loads are handled by code generated from the .td file.
4438 if (LD
->getAddressingMode() != ISD::PRE_INC
) {
4439 // Change TLS initial-exec D-form loads to X-form loads.
4440 if (EnableTLSOpt
&& PPCSubTarget
->isELFv2ABI())
4441 if (tryTLSXFormLoad(LD
))
4446 SDValue Offset
= LD
->getOffset();
4447 if (Offset
.getOpcode() == ISD::TargetConstant
||
4448 Offset
.getOpcode() == ISD::TargetGlobalAddress
) {
4451 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
4452 if (LD
->getValueType(0) != MVT::i64
) {
4453 // Handle PPC32 integer and normal FP loads.
4454 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4455 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4456 default: llvm_unreachable("Invalid PPC load type!");
4457 case MVT::f64
: Opcode
= PPC::LFDU
; break;
4458 case MVT::f32
: Opcode
= PPC::LFSU
; break;
4459 case MVT::i32
: Opcode
= PPC::LWZU
; break;
4460 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU
: PPC::LHZU
; break;
4462 case MVT::i8
: Opcode
= PPC::LBZU
; break;
4465 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
4466 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4467 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4468 default: llvm_unreachable("Invalid PPC load type!");
4469 case MVT::i64
: Opcode
= PPC::LDU
; break;
4470 case MVT::i32
: Opcode
= PPC::LWZU8
; break;
4471 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU8
: PPC::LHZU8
; break;
4473 case MVT::i8
: Opcode
= PPC::LBZU8
; break;
4477 SDValue Chain
= LD
->getChain();
4478 SDValue Base
= LD
->getBasePtr();
4479 SDValue Ops
[] = { Offset
, Base
, Chain
};
4480 SDNode
*MN
= CurDAG
->getMachineNode(
4481 Opcode
, dl
, LD
->getValueType(0),
4482 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
4483 transferMemOperands(N
, MN
);
4488 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
4489 if (LD
->getValueType(0) != MVT::i64
) {
4490 // Handle PPC32 integer and normal FP loads.
4491 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4492 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4493 default: llvm_unreachable("Invalid PPC load type!");
4494 case MVT::v4f64
: Opcode
= PPC::QVLFDUX
; break; // QPX
4495 case MVT::v4f32
: Opcode
= PPC::QVLFSUX
; break; // QPX
4496 case MVT::f64
: Opcode
= PPC::LFDUX
; break;
4497 case MVT::f32
: Opcode
= PPC::LFSUX
; break;
4498 case MVT::i32
: Opcode
= PPC::LWZUX
; break;
4499 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX
: PPC::LHZUX
; break;
4501 case MVT::i8
: Opcode
= PPC::LBZUX
; break;
4504 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
4505 assert((!isSExt
|| LoadedVT
== MVT::i16
|| LoadedVT
== MVT::i32
) &&
4506 "Invalid sext update load");
4507 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4508 default: llvm_unreachable("Invalid PPC load type!");
4509 case MVT::i64
: Opcode
= PPC::LDUX
; break;
4510 case MVT::i32
: Opcode
= isSExt
? PPC::LWAUX
: PPC::LWZUX8
; break;
4511 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX8
: PPC::LHZUX8
; break;
4513 case MVT::i8
: Opcode
= PPC::LBZUX8
; break;
4517 SDValue Chain
= LD
->getChain();
4518 SDValue Base
= LD
->getBasePtr();
4519 SDValue Ops
[] = { Base
, Offset
, Chain
};
4520 SDNode
*MN
= CurDAG
->getMachineNode(
4521 Opcode
, dl
, LD
->getValueType(0),
4522 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
4523 transferMemOperands(N
, MN
);
4530 unsigned Imm
, Imm2
, SH
, MB
, ME
;
4533 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4534 // with a mask, emit rlwinm
4535 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4536 isRotateAndMask(N
->getOperand(0).getNode(), Imm
, false, SH
, MB
, ME
)) {
4537 SDValue Val
= N
->getOperand(0).getOperand(0);
4538 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4539 getI32Imm(ME
, dl
) };
4540 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4543 // If this is just a masked value where the input is not handled above, and
4544 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4545 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4546 isRunOfOnes(Imm
, MB
, ME
) &&
4547 N
->getOperand(0).getOpcode() != ISD::ROTL
) {
4548 SDValue Val
= N
->getOperand(0);
4549 SDValue Ops
[] = { Val
, getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4550 getI32Imm(ME
, dl
) };
4551 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4554 // If this is a 64-bit zero-extension mask, emit rldicl.
4555 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
4557 SDValue Val
= N
->getOperand(0);
4558 MB
= 64 - countTrailingOnes(Imm64
);
4561 if (Val
.getOpcode() == ISD::ANY_EXTEND
) {
4562 auto Op0
= Val
.getOperand(0);
4563 if ( Op0
.getOpcode() == ISD::SRL
&&
4564 isInt32Immediate(Op0
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4566 auto ResultType
= Val
.getNode()->getValueType(0);
4567 auto ImDef
= CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
4569 SDValue
IDVal (ImDef
, 0);
4571 Val
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
4572 ResultType
, IDVal
, Op0
.getOperand(0),
4573 getI32Imm(1, dl
)), 0);
4578 // If the operand is a logical right shift, we can fold it into this
4579 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4580 // for n <= mb. The right shift is really a left rotate followed by a
4581 // mask, and this mask is a more-restrictive sub-mask of the mask implied
4583 if (Val
.getOpcode() == ISD::SRL
&&
4584 isInt32Immediate(Val
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4585 assert(Imm
< 64 && "Illegal shift amount");
4586 Val
= Val
.getOperand(0);
4590 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
4591 CurDAG
->SelectNodeTo(N
, PPC::RLDICL
, MVT::i64
, Ops
);
4594 // If this is a negated 64-bit zero-extension mask,
4595 // i.e. the immediate is a sequence of ones from most significant side
4596 // and all zero for reminder, we should use rldicr.
4597 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
4598 isMask_64(~Imm64
)) {
4599 SDValue Val
= N
->getOperand(0);
4600 MB
= 63 - countTrailingOnes(~Imm64
);
4602 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
4603 CurDAG
->SelectNodeTo(N
, PPC::RLDICR
, MVT::i64
, Ops
);
4607 // AND X, 0 -> 0, not "rlwinm 32".
4608 if (isInt32Immediate(N
->getOperand(1), Imm
) && (Imm
== 0)) {
4609 ReplaceUses(SDValue(N
, 0), N
->getOperand(1));
4612 // ISD::OR doesn't get all the bitfield insertion fun.
4613 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4615 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4616 N
->getOperand(0).getOpcode() == ISD::OR
&&
4617 isInt32Immediate(N
->getOperand(0).getOperand(1), Imm2
)) {
4618 // The idea here is to check whether this is equivalent to:
4619 // (c1 & m) | (x & ~m)
4620 // where m is a run-of-ones mask. The logic here is that, for each bit in
4622 // - if both are 1, then the output will be 1.
4623 // - if both are 0, then the output will be 0.
4624 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4626 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4628 // If that last condition is never the case, then we can form m from the
4629 // bits that are the same between c1 and c2.
4631 if (isRunOfOnes(~(Imm
^Imm2
), MB
, ME
) && !(~Imm
& Imm2
)) {
4632 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4633 N
->getOperand(0).getOperand(1),
4634 getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4635 getI32Imm(ME
, dl
) };
4636 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
4641 // Other cases are autogenerated.
4645 if (N
->getValueType(0) == MVT::i32
)
4646 if (tryBitfieldInsert(N
))
4650 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
4651 isIntS16Immediate(N
->getOperand(1), Imm
)) {
4652 KnownBits LHSKnown
= CurDAG
->computeKnownBits(N
->getOperand(0));
4654 // If this is equivalent to an add, then we can fold it with the
4655 // FrameIndex calculation.
4656 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)Imm
) == ~0ULL) {
4657 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
4662 // OR with a 32-bit immediate can be handled by ori + oris
4663 // without creating an immediate in a GPR.
4665 bool IsPPC64
= PPCSubTarget
->isPPC64();
4666 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
4667 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
4668 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4669 uint64_t ImmHi
= Imm64
>> 16;
4670 uint64_t ImmLo
= Imm64
& 0xFFFF;
4671 if (ImmHi
!= 0 && ImmLo
!= 0) {
4672 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
4674 getI16Imm(ImmLo
, dl
));
4675 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
4676 CurDAG
->SelectNodeTo(N
, PPC::ORIS8
, MVT::i64
, Ops1
);
4681 // Other cases are autogenerated.
4685 // XOR with a 32-bit immediate can be handled by xori + xoris
4686 // without creating an immediate in a GPR.
4688 bool IsPPC64
= PPCSubTarget
->isPPC64();
4689 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
4690 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
4691 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4692 uint64_t ImmHi
= Imm64
>> 16;
4693 uint64_t ImmLo
= Imm64
& 0xFFFF;
4694 if (ImmHi
!= 0 && ImmLo
!= 0) {
4695 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
4697 getI16Imm(ImmLo
, dl
));
4698 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
4699 CurDAG
->SelectNodeTo(N
, PPC::XORIS8
, MVT::i64
, Ops1
);
4708 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
4709 isIntS16Immediate(N
->getOperand(1), Imm
)) {
4710 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
4717 unsigned Imm
, SH
, MB
, ME
;
4718 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
4719 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
4720 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4721 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4722 getI32Imm(ME
, dl
) };
4723 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4727 // Other cases are autogenerated.
4731 unsigned Imm
, SH
, MB
, ME
;
4732 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
4733 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
4734 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4735 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4736 getI32Imm(ME
, dl
) };
4737 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4741 // Other cases are autogenerated.
4744 // FIXME: Remove this once the ANDI glue bug is fixed:
4745 case PPCISD::ANDIo_1_EQ_BIT
:
4746 case PPCISD::ANDIo_1_GT_BIT
: {
4750 EVT InVT
= N
->getOperand(0).getValueType();
4751 assert((InVT
== MVT::i64
|| InVT
== MVT::i32
) &&
4752 "Invalid input type for ANDIo_1_EQ_BIT");
4754 unsigned Opcode
= (InVT
== MVT::i64
) ? PPC::ANDIo8
: PPC::ANDIo
;
4755 SDValue
AndI(CurDAG
->getMachineNode(Opcode
, dl
, InVT
, MVT::Glue
,
4757 CurDAG
->getTargetConstant(1, dl
, InVT
)),
4759 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
4761 CurDAG
->getTargetConstant(N
->getOpcode() == PPCISD::ANDIo_1_EQ_BIT
?
4762 PPC::sub_eq
: PPC::sub_gt
, dl
, MVT::i32
);
4764 CurDAG
->SelectNodeTo(N
, TargetOpcode::EXTRACT_SUBREG
, MVT::i1
, CR0Reg
,
4765 SRIdxVal
, SDValue(AndI
.getNode(), 1) /* glue */);
4768 case ISD::SELECT_CC
: {
4769 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
4771 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
4772 bool isPPC64
= (PtrVT
== MVT::i64
);
4774 // If this is a select of i1 operands, we'll pattern match it.
4775 if (PPCSubTarget
->useCRBits() &&
4776 N
->getOperand(0).getValueType() == MVT::i1
)
4779 if (PPCSubTarget
->isISA3_0() && PPCSubTarget
->isPPC64()) {
4780 bool NeedSwapOps
= false;
4781 bool IsUnCmp
= false;
4782 if (mayUseP9Setb(N
, CC
, CurDAG
, NeedSwapOps
, IsUnCmp
)) {
4783 SDValue LHS
= N
->getOperand(0);
4784 SDValue RHS
= N
->getOperand(1);
4786 std::swap(LHS
, RHS
);
4788 // Make use of SelectCC to generate the comparison to set CR bits, for
4789 // equality comparisons having one literal operand, SelectCC probably
4790 // doesn't need to materialize the whole literal and just use xoris to
4791 // check it first, it leads the following comparison result can't
4792 // exactly represent GT/LT relationship. So to avoid this we specify
4793 // SETGT/SETUGT here instead of SETEQ.
4795 SelectCC(LHS
, RHS
, IsUnCmp
? ISD::SETUGT
: ISD::SETGT
, dl
);
4796 CurDAG
->SelectNodeTo(
4797 N
, N
->getSimpleValueType(0) == MVT::i64
? PPC::SETB8
: PPC::SETB
,
4798 N
->getValueType(0), GenCC
);
4804 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
4806 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
4807 if (ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
4808 if (ConstantSDNode
*N3C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
4809 if (N1C
->isNullValue() && N3C
->isNullValue() &&
4810 N2C
->getZExtValue() == 1ULL && CC
== ISD::SETNE
&&
4811 // FIXME: Implement this optzn for PPC64.
4812 N
->getValueType(0) == MVT::i32
) {
4814 CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4815 N
->getOperand(0), getI32Imm(~0U, dl
));
4816 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(Tmp
, 0),
4817 N
->getOperand(0), SDValue(Tmp
, 1));
4821 SDValue CCReg
= SelectCC(N
->getOperand(0), N
->getOperand(1), CC
, dl
);
4823 if (N
->getValueType(0) == MVT::i1
) {
4824 // An i1 select is: (c & t) | (!c & f).
4826 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
4830 default: llvm_unreachable("Invalid CC index");
4831 case 0: SRI
= PPC::sub_lt
; break;
4832 case 1: SRI
= PPC::sub_gt
; break;
4833 case 2: SRI
= PPC::sub_eq
; break;
4834 case 3: SRI
= PPC::sub_un
; break;
4837 SDValue CCBit
= CurDAG
->getTargetExtractSubreg(SRI
, dl
, MVT::i1
, CCReg
);
4839 SDValue
NotCCBit(CurDAG
->getMachineNode(PPC::CRNOR
, dl
, MVT::i1
,
4841 SDValue C
= Inv
? NotCCBit
: CCBit
,
4842 NotC
= Inv
? CCBit
: NotCCBit
;
4844 SDValue
CAndT(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
4845 C
, N
->getOperand(2)), 0);
4846 SDValue
NotCAndF(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
4847 NotC
, N
->getOperand(3)), 0);
4849 CurDAG
->SelectNodeTo(N
, PPC::CROR
, MVT::i1
, CAndT
, NotCAndF
);
4853 unsigned BROpc
= getPredicateForSetCC(CC
);
4855 unsigned SelectCCOp
;
4856 if (N
->getValueType(0) == MVT::i32
)
4857 SelectCCOp
= PPC::SELECT_CC_I4
;
4858 else if (N
->getValueType(0) == MVT::i64
)
4859 SelectCCOp
= PPC::SELECT_CC_I8
;
4860 else if (N
->getValueType(0) == MVT::f32
) {
4861 if (PPCSubTarget
->hasP8Vector())
4862 SelectCCOp
= PPC::SELECT_CC_VSSRC
;
4863 else if (PPCSubTarget
->hasSPE())
4864 SelectCCOp
= PPC::SELECT_CC_SPE4
;
4866 SelectCCOp
= PPC::SELECT_CC_F4
;
4867 } else if (N
->getValueType(0) == MVT::f64
) {
4868 if (PPCSubTarget
->hasVSX())
4869 SelectCCOp
= PPC::SELECT_CC_VSFRC
;
4870 else if (PPCSubTarget
->hasSPE())
4871 SelectCCOp
= PPC::SELECT_CC_SPE
;
4873 SelectCCOp
= PPC::SELECT_CC_F8
;
4874 } else if (N
->getValueType(0) == MVT::f128
)
4875 SelectCCOp
= PPC::SELECT_CC_F16
;
4876 else if (PPCSubTarget
->hasSPE())
4877 SelectCCOp
= PPC::SELECT_CC_SPE
;
4878 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f64
)
4879 SelectCCOp
= PPC::SELECT_CC_QFRC
;
4880 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f32
)
4881 SelectCCOp
= PPC::SELECT_CC_QSRC
;
4882 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4i1
)
4883 SelectCCOp
= PPC::SELECT_CC_QBRC
;
4884 else if (N
->getValueType(0) == MVT::v2f64
||
4885 N
->getValueType(0) == MVT::v2i64
)
4886 SelectCCOp
= PPC::SELECT_CC_VSRC
;
4888 SelectCCOp
= PPC::SELECT_CC_VRRC
;
4890 SDValue Ops
[] = { CCReg
, N
->getOperand(2), N
->getOperand(3),
4891 getI32Imm(BROpc
, dl
) };
4892 CurDAG
->SelectNodeTo(N
, SelectCCOp
, N
->getValueType(0), Ops
);
4895 case ISD::VECTOR_SHUFFLE
:
4896 if (PPCSubTarget
->hasVSX() && (N
->getValueType(0) == MVT::v2f64
||
4897 N
->getValueType(0) == MVT::v2i64
)) {
4898 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
4900 SDValue Op1
= N
->getOperand(SVN
->getMaskElt(0) < 2 ? 0 : 1),
4901 Op2
= N
->getOperand(SVN
->getMaskElt(1) < 2 ? 0 : 1);
4904 for (int i
= 0; i
< 2; ++i
)
4905 if (SVN
->getMaskElt(i
) <= 0 || SVN
->getMaskElt(i
) == 2)
4910 if (Op1
== Op2
&& DM
[0] == 0 && DM
[1] == 0 &&
4911 Op1
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
4912 isa
<LoadSDNode
>(Op1
.getOperand(0))) {
4913 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op1
.getOperand(0));
4914 SDValue Base
, Offset
;
4916 if (LD
->isUnindexed() && LD
->hasOneUse() && Op1
.hasOneUse() &&
4917 (LD
->getMemoryVT() == MVT::f64
||
4918 LD
->getMemoryVT() == MVT::i64
) &&
4919 SelectAddrIdxOnly(LD
->getBasePtr(), Base
, Offset
)) {
4920 SDValue Chain
= LD
->getChain();
4921 SDValue Ops
[] = { Base
, Offset
, Chain
};
4922 MachineMemOperand
*MemOp
= LD
->getMemOperand();
4923 SDNode
*NewN
= CurDAG
->SelectNodeTo(N
, PPC::LXVDSX
,
4924 N
->getValueType(0), Ops
);
4925 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(NewN
), {MemOp
});
4930 // For little endian, we must swap the input operands and adjust
4931 // the mask elements (reverse and invert them).
4932 if (PPCSubTarget
->isLittleEndian()) {
4933 std::swap(Op1
, Op2
);
4934 unsigned tmp
= DM
[0];
4939 SDValue DMV
= CurDAG
->getTargetConstant(DM
[1] | (DM
[0] << 1), dl
,
4941 SDValue Ops
[] = { Op1
, Op2
, DMV
};
4942 CurDAG
->SelectNodeTo(N
, PPC::XXPERMDI
, N
->getValueType(0), Ops
);
4949 bool IsPPC64
= PPCSubTarget
->isPPC64();
4950 SDValue Ops
[] = { N
->getOperand(1), N
->getOperand(0) };
4951 CurDAG
->SelectNodeTo(N
, N
->getOpcode() == PPCISD::BDNZ
4952 ? (IsPPC64
? PPC::BDNZ8
: PPC::BDNZ
)
4953 : (IsPPC64
? PPC::BDZ8
: PPC::BDZ
),
4957 case PPCISD::COND_BRANCH
: {
4958 // Op #0 is the Chain.
4959 // Op #1 is the PPC::PRED_* number.
4961 // Op #3 is the Dest MBB
4962 // Op #4 is the Flag.
4963 // Prevent PPC::PRED_* from being selected into LI.
4964 unsigned PCC
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
4965 if (EnableBranchHint
)
4966 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(3));
4968 SDValue Pred
= getI32Imm(PCC
, dl
);
4969 SDValue Ops
[] = { Pred
, N
->getOperand(2), N
->getOperand(3),
4970 N
->getOperand(0), N
->getOperand(4) };
4971 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
4975 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
4976 unsigned PCC
= getPredicateForSetCC(CC
);
4978 if (N
->getOperand(2).getValueType() == MVT::i1
) {
4982 default: llvm_unreachable("Unexpected Boolean-operand predicate");
4983 case PPC::PRED_LT
: Opc
= PPC::CRANDC
; Swap
= true; break;
4984 case PPC::PRED_LE
: Opc
= PPC::CRORC
; Swap
= true; break;
4985 case PPC::PRED_EQ
: Opc
= PPC::CREQV
; Swap
= false; break;
4986 case PPC::PRED_GE
: Opc
= PPC::CRORC
; Swap
= false; break;
4987 case PPC::PRED_GT
: Opc
= PPC::CRANDC
; Swap
= false; break;
4988 case PPC::PRED_NE
: Opc
= PPC::CRXOR
; Swap
= false; break;
4991 // A signed comparison of i1 values produces the opposite result to an
4992 // unsigned one if the condition code includes less-than or greater-than.
4993 // This is because 1 is the most negative signed i1 number and the most
4994 // positive unsigned i1 number. The CR-logical operations used for such
4995 // comparisons are non-commutative so for signed comparisons vs. unsigned
4996 // ones, the input operands just need to be swapped.
4997 if (ISD::isSignedIntSetCC(CC
))
5000 SDValue
BitComp(CurDAG
->getMachineNode(Opc
, dl
, MVT::i1
,
5001 N
->getOperand(Swap
? 3 : 2),
5002 N
->getOperand(Swap
? 2 : 3)), 0);
5003 CurDAG
->SelectNodeTo(N
, PPC::BC
, MVT::Other
, BitComp
, N
->getOperand(4),
5008 if (EnableBranchHint
)
5009 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(4));
5011 SDValue CondCode
= SelectCC(N
->getOperand(2), N
->getOperand(3), CC
, dl
);
5012 SDValue Ops
[] = { getI32Imm(PCC
, dl
), CondCode
,
5013 N
->getOperand(4), N
->getOperand(0) };
5014 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
5018 // FIXME: Should custom lower this.
5019 SDValue Chain
= N
->getOperand(0);
5020 SDValue Target
= N
->getOperand(1);
5021 unsigned Opc
= Target
.getValueType() == MVT::i32
? PPC::MTCTR
: PPC::MTCTR8
;
5022 unsigned Reg
= Target
.getValueType() == MVT::i32
? PPC::BCTR
: PPC::BCTR8
;
5023 Chain
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, Target
,
5025 CurDAG
->SelectNodeTo(N
, Reg
, MVT::Other
, Chain
);
5028 case PPCISD::TOC_ENTRY
: {
5029 assert ((PPCSubTarget
->isPPC64() || PPCSubTarget
->isSVR4ABI()) &&
5030 "Only supported for 64-bit ABI and 32-bit SVR4");
5031 if (PPCSubTarget
->isSVR4ABI() && !PPCSubTarget
->isPPC64()) {
5032 SDValue GA
= N
->getOperand(0);
5033 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LWZtoc
, dl
, MVT::i32
, GA
,
5035 transferMemOperands(N
, MN
);
5040 // For medium and large code model, we generate two instructions as
5041 // described below. Otherwise we allow SelectCodeCommon to handle this,
5042 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
5043 CodeModel::Model CModel
= TM
.getCodeModel();
5044 if (CModel
!= CodeModel::Medium
&& CModel
!= CodeModel::Large
)
5047 // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
5048 // If it must be toc-referenced according to PPCSubTarget, we generate:
5049 // LDtocL(@sym, ADDIStocHA(%x2, @sym))
5050 // Otherwise we generate:
5051 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
5052 SDValue GA
= N
->getOperand(0);
5053 SDValue TOCbase
= N
->getOperand(1);
5054 SDNode
*Tmp
= CurDAG
->getMachineNode(PPC::ADDIStocHA
, dl
, MVT::i64
,
5056 if (PPCLowering
->isAccessedAsGotIndirect(GA
)) {
5057 // If it is access as got-indirect, we need an extra LD to load
5059 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LDtocL
, dl
, MVT::i64
, GA
,
5061 transferMemOperands(N
, MN
);
5066 // Build the address relative to the TOC-pointer..
5067 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ADDItocL
, dl
, MVT::i64
,
5068 SDValue(Tmp
, 0), GA
));
5071 case PPCISD::PPC32_PICGOT
:
5072 // Generate a PIC-safe GOT reference.
5073 assert(!PPCSubTarget
->isPPC64() && PPCSubTarget
->isSVR4ABI() &&
5074 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
5075 CurDAG
->SelectNodeTo(N
, PPC::PPC32PICGOT
,
5076 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()),
5080 case PPCISD::VADD_SPLAT
: {
5081 // This expands into one of three sequences, depending on whether
5082 // the first operand is odd or even, positive or negative.
5083 assert(isa
<ConstantSDNode
>(N
->getOperand(0)) &&
5084 isa
<ConstantSDNode
>(N
->getOperand(1)) &&
5085 "Invalid operand on VADD_SPLAT!");
5087 int Elt
= N
->getConstantOperandVal(0);
5088 int EltSize
= N
->getConstantOperandVal(1);
5089 unsigned Opc1
, Opc2
, Opc3
;
5093 Opc1
= PPC::VSPLTISB
;
5094 Opc2
= PPC::VADDUBM
;
5095 Opc3
= PPC::VSUBUBM
;
5097 } else if (EltSize
== 2) {
5098 Opc1
= PPC::VSPLTISH
;
5099 Opc2
= PPC::VADDUHM
;
5100 Opc3
= PPC::VSUBUHM
;
5103 assert(EltSize
== 4 && "Invalid element size on VADD_SPLAT!");
5104 Opc1
= PPC::VSPLTISW
;
5105 Opc2
= PPC::VADDUWM
;
5106 Opc3
= PPC::VSUBUWM
;
5110 if ((Elt
& 1) == 0) {
5111 // Elt is even, in the range [-32,-18] + [16,30].
5113 // Convert: VADD_SPLAT elt, size
5114 // Into: tmp = VSPLTIS[BHW] elt
5115 // VADDU[BHW]M tmp, tmp
5116 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
5117 SDValue EltVal
= getI32Imm(Elt
>> 1, dl
);
5118 SDNode
*Tmp
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5119 SDValue TmpVal
= SDValue(Tmp
, 0);
5120 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, TmpVal
, TmpVal
));
5122 } else if (Elt
> 0) {
5123 // Elt is odd and positive, in the range [17,31].
5125 // Convert: VADD_SPLAT elt, size
5126 // Into: tmp1 = VSPLTIS[BHW] elt-16
5127 // tmp2 = VSPLTIS[BHW] -16
5128 // VSUBU[BHW]M tmp1, tmp2
5129 SDValue EltVal
= getI32Imm(Elt
- 16, dl
);
5130 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5131 EltVal
= getI32Imm(-16, dl
);
5132 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5133 ReplaceNode(N
, CurDAG
->getMachineNode(Opc3
, dl
, VT
, SDValue(Tmp1
, 0),
5137 // Elt is odd and negative, in the range [-31,-17].
5139 // Convert: VADD_SPLAT elt, size
5140 // Into: tmp1 = VSPLTIS[BHW] elt+16
5141 // tmp2 = VSPLTIS[BHW] -16
5142 // VADDU[BHW]M tmp1, tmp2
5143 SDValue EltVal
= getI32Imm(Elt
+ 16, dl
);
5144 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5145 EltVal
= getI32Imm(-16, dl
);
5146 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5147 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, SDValue(Tmp1
, 0),
5157 // If the target supports the cmpb instruction, do the idiom recognition here.
5158 // We don't do this as a DAG combine because we don't want to do it as nodes
5159 // are being combined (because we might miss part of the eventual idiom). We
5160 // don't want to do it during instruction selection because we want to reuse
5161 // the logic for lowering the masking operations already part of the
5162 // instruction selector.
5163 SDValue
PPCDAGToDAGISel::combineToCMPB(SDNode
*N
) {
5166 assert(N
->getOpcode() == ISD::OR
&&
5167 "Only OR nodes are supported for CMPB");
5170 if (!PPCSubTarget
->hasCMPB())
5173 if (N
->getValueType(0) != MVT::i32
&&
5174 N
->getValueType(0) != MVT::i64
)
5177 EVT VT
= N
->getValueType(0);
5180 bool BytesFound
[8] = {false, false, false, false, false, false, false, false};
5181 uint64_t Mask
= 0, Alt
= 0;
5183 auto IsByteSelectCC
= [this](SDValue O
, unsigned &b
,
5184 uint64_t &Mask
, uint64_t &Alt
,
5185 SDValue
&LHS
, SDValue
&RHS
) {
5186 if (O
.getOpcode() != ISD::SELECT_CC
)
5188 ISD::CondCode CC
= cast
<CondCodeSDNode
>(O
.getOperand(4))->get();
5190 if (!isa
<ConstantSDNode
>(O
.getOperand(2)) ||
5191 !isa
<ConstantSDNode
>(O
.getOperand(3)))
5194 uint64_t PM
= O
.getConstantOperandVal(2);
5195 uint64_t PAlt
= O
.getConstantOperandVal(3);
5196 for (b
= 0; b
< 8; ++b
) {
5197 uint64_t Mask
= UINT64_C(0xFF) << (8*b
);
5198 if (PM
&& (PM
& Mask
) == PM
&& (PAlt
& Mask
) == PAlt
)
5207 if (!isa
<ConstantSDNode
>(O
.getOperand(1)) ||
5208 O
.getConstantOperandVal(1) != 0) {
5209 SDValue Op0
= O
.getOperand(0), Op1
= O
.getOperand(1);
5210 if (Op0
.getOpcode() == ISD::TRUNCATE
)
5211 Op0
= Op0
.getOperand(0);
5212 if (Op1
.getOpcode() == ISD::TRUNCATE
)
5213 Op1
= Op1
.getOperand(0);
5215 if (Op0
.getOpcode() == ISD::SRL
&& Op1
.getOpcode() == ISD::SRL
&&
5216 Op0
.getOperand(1) == Op1
.getOperand(1) && CC
== ISD::SETEQ
&&
5217 isa
<ConstantSDNode
>(Op0
.getOperand(1))) {
5219 unsigned Bits
= Op0
.getValueSizeInBits();
5222 if (Op0
.getConstantOperandVal(1) != Bits
-8)
5225 LHS
= Op0
.getOperand(0);
5226 RHS
= Op1
.getOperand(0);
5230 // When we have small integers (i16 to be specific), the form present
5231 // post-legalization uses SETULT in the SELECT_CC for the
5232 // higher-order byte, depending on the fact that the
5233 // even-higher-order bytes are known to all be zero, for example:
5234 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5235 // (so when the second byte is the same, because all higher-order
5236 // bits from bytes 3 and 4 are known to be zero, the result of the
5237 // xor can be at most 255)
5238 if (Op0
.getOpcode() == ISD::XOR
&& CC
== ISD::SETULT
&&
5239 isa
<ConstantSDNode
>(O
.getOperand(1))) {
5241 uint64_t ULim
= O
.getConstantOperandVal(1);
5242 if (ULim
!= (UINT64_C(1) << b
*8))
5245 // Now we need to make sure that the upper bytes are known to be
5247 unsigned Bits
= Op0
.getValueSizeInBits();
5248 if (!CurDAG
->MaskedValueIsZero(
5249 Op0
, APInt::getHighBitsSet(Bits
, Bits
- (b
+ 1) * 8)))
5252 LHS
= Op0
.getOperand(0);
5253 RHS
= Op0
.getOperand(1);
5260 if (CC
!= ISD::SETEQ
)
5263 SDValue Op
= O
.getOperand(0);
5264 if (Op
.getOpcode() == ISD::AND
) {
5265 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5267 if (Op
.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b
)))
5270 SDValue XOR
= Op
.getOperand(0);
5271 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5272 XOR
= XOR
.getOperand(0);
5273 if (XOR
.getOpcode() != ISD::XOR
)
5276 LHS
= XOR
.getOperand(0);
5277 RHS
= XOR
.getOperand(1);
5279 } else if (Op
.getOpcode() == ISD::SRL
) {
5280 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5282 unsigned Bits
= Op
.getValueSizeInBits();
5285 if (Op
.getConstantOperandVal(1) != Bits
-8)
5288 SDValue XOR
= Op
.getOperand(0);
5289 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5290 XOR
= XOR
.getOperand(0);
5291 if (XOR
.getOpcode() != ISD::XOR
)
5294 LHS
= XOR
.getOperand(0);
5295 RHS
= XOR
.getOperand(1);
5302 SmallVector
<SDValue
, 8> Queue(1, SDValue(N
, 0));
5303 while (!Queue
.empty()) {
5304 SDValue V
= Queue
.pop_back_val();
5306 for (const SDValue
&O
: V
.getNode()->ops()) {
5308 uint64_t M
= 0, A
= 0;
5310 if (O
.getOpcode() == ISD::OR
) {
5312 } else if (IsByteSelectCC(O
, b
, M
, A
, OLHS
, ORHS
)) {
5316 BytesFound
[b
] = true;
5319 } else if ((LHS
== ORHS
&& RHS
== OLHS
) ||
5320 (RHS
== ORHS
&& LHS
== OLHS
)) {
5321 BytesFound
[b
] = true;
5333 unsigned LastB
= 0, BCnt
= 0;
5334 for (unsigned i
= 0; i
< 8; ++i
)
5335 if (BytesFound
[LastB
]) {
5340 if (!LastB
|| BCnt
< 2)
5343 // Because we'll be zero-extending the output anyway if don't have a specific
5344 // value for each input byte (via the Mask), we can 'anyext' the inputs.
5345 if (LHS
.getValueType() != VT
) {
5346 LHS
= CurDAG
->getAnyExtOrTrunc(LHS
, dl
, VT
);
5347 RHS
= CurDAG
->getAnyExtOrTrunc(RHS
, dl
, VT
);
5350 Res
= CurDAG
->getNode(PPCISD::CMPB
, dl
, VT
, LHS
, RHS
);
5352 bool NonTrivialMask
= ((int64_t) Mask
) != INT64_C(-1);
5353 if (NonTrivialMask
&& !Alt
) {
5354 // Res = Mask & CMPB
5355 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
5356 CurDAG
->getConstant(Mask
, dl
, VT
));
5358 // Res = (CMPB & Mask) | (~CMPB & Alt)
5359 // Which, as suggested here:
5360 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5361 // can be written as:
5362 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5363 // useful because the (Alt ^ Mask) can be pre-computed.
5364 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
5365 CurDAG
->getConstant(Mask
^ Alt
, dl
, VT
));
5366 Res
= CurDAG
->getNode(ISD::XOR
, dl
, VT
, Res
,
5367 CurDAG
->getConstant(Alt
, dl
, VT
));
5373 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5374 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5375 // involves constant materialization of a 0 or a 1 or both. If the result of
5376 // the extension is then operated upon by some operator that can be constant
5377 // folded with a constant 0 or 1, and that constant can be materialized using
5378 // only one instruction (like a zero or one), then we should fold in those
5379 // operations with the select.
5380 void PPCDAGToDAGISel::foldBoolExts(SDValue
&Res
, SDNode
*&N
) {
5381 if (!PPCSubTarget
->useCRBits())
5384 if (N
->getOpcode() != ISD::ZERO_EXTEND
&&
5385 N
->getOpcode() != ISD::SIGN_EXTEND
&&
5386 N
->getOpcode() != ISD::ANY_EXTEND
)
5389 if (N
->getOperand(0).getValueType() != MVT::i1
)
5392 if (!N
->hasOneUse())
5396 EVT VT
= N
->getValueType(0);
5397 SDValue Cond
= N
->getOperand(0);
5399 CurDAG
->getConstant(N
->getOpcode() == ISD::SIGN_EXTEND
? -1 : 1, dl
, VT
);
5400 SDValue ConstFalse
= CurDAG
->getConstant(0, dl
, VT
);
5403 SDNode
*User
= *N
->use_begin();
5404 if (User
->getNumOperands() != 2)
5407 auto TryFold
= [this, N
, User
, dl
](SDValue Val
) {
5408 SDValue UserO0
= User
->getOperand(0), UserO1
= User
->getOperand(1);
5409 SDValue O0
= UserO0
.getNode() == N
? Val
: UserO0
;
5410 SDValue O1
= UserO1
.getNode() == N
? Val
: UserO1
;
5412 return CurDAG
->FoldConstantArithmetic(User
->getOpcode(), dl
,
5413 User
->getValueType(0),
5414 O0
.getNode(), O1
.getNode());
5417 // FIXME: When the semantics of the interaction between select and undef
5418 // are clearly defined, it may turn out to be unnecessary to break here.
5419 SDValue TrueRes
= TryFold(ConstTrue
);
5420 if (!TrueRes
|| TrueRes
.isUndef())
5422 SDValue FalseRes
= TryFold(ConstFalse
);
5423 if (!FalseRes
|| FalseRes
.isUndef())
5426 // For us to materialize these using one instruction, we must be able to
5427 // represent them as signed 16-bit integers.
5428 uint64_t True
= cast
<ConstantSDNode
>(TrueRes
)->getZExtValue(),
5429 False
= cast
<ConstantSDNode
>(FalseRes
)->getZExtValue();
5430 if (!isInt
<16>(True
) || !isInt
<16>(False
))
5433 // We can replace User with a new SELECT node, and try again to see if we
5434 // can fold the select with its user.
5435 Res
= CurDAG
->getSelect(dl
, User
->getValueType(0), Cond
, TrueRes
, FalseRes
);
5437 ConstTrue
= TrueRes
;
5438 ConstFalse
= FalseRes
;
5439 } while (N
->hasOneUse());
5442 void PPCDAGToDAGISel::PreprocessISelDAG() {
5443 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
5445 bool MadeChange
= false;
5446 while (Position
!= CurDAG
->allnodes_begin()) {
5447 SDNode
*N
= &*--Position
;
5452 switch (N
->getOpcode()) {
5455 Res
= combineToCMPB(N
);
5460 foldBoolExts(Res
, N
);
5463 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
5464 LLVM_DEBUG(N
->dump(CurDAG
));
5465 LLVM_DEBUG(dbgs() << "\nNew: ");
5466 LLVM_DEBUG(Res
.getNode()->dump(CurDAG
));
5467 LLVM_DEBUG(dbgs() << "\n");
5469 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Res
);
5475 CurDAG
->RemoveDeadNodes();
5478 /// PostprocessISelDAG - Perform some late peephole optimizations
5479 /// on the DAG representation.
5480 void PPCDAGToDAGISel::PostprocessISelDAG() {
5481 // Skip peepholes at -O0.
5482 if (TM
.getOptLevel() == CodeGenOpt::None
)
5487 PeepholePPC64ZExt();
5490 // Check if all users of this node will become isel where the second operand
5491 // is the constant zero. If this is so, and if we can negate the condition,
5492 // then we can flip the true and false operands. This will allow the zero to
5493 // be folded with the isel so that we don't need to materialize a register
5495 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode
*N
) {
5496 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5499 if (!User
->isMachineOpcode())
5501 if (User
->getMachineOpcode() != PPC::SELECT_I4
&&
5502 User
->getMachineOpcode() != PPC::SELECT_I8
)
5505 SDNode
*Op2
= User
->getOperand(2).getNode();
5506 if (!Op2
->isMachineOpcode())
5509 if (Op2
->getMachineOpcode() != PPC::LI
&&
5510 Op2
->getMachineOpcode() != PPC::LI8
)
5513 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op2
->getOperand(0));
5517 if (!C
->isNullValue())
5524 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode
*N
) {
5525 SmallVector
<SDNode
*, 4> ToReplace
;
5526 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5529 assert((User
->getMachineOpcode() == PPC::SELECT_I4
||
5530 User
->getMachineOpcode() == PPC::SELECT_I8
) &&
5531 "Must have all select users");
5532 ToReplace
.push_back(User
);
5535 for (SmallVector
<SDNode
*, 4>::iterator UI
= ToReplace
.begin(),
5536 UE
= ToReplace
.end(); UI
!= UE
; ++UI
) {
5539 CurDAG
->getMachineNode(User
->getMachineOpcode(), SDLoc(User
),
5540 User
->getValueType(0), User
->getOperand(0),
5541 User
->getOperand(2),
5542 User
->getOperand(1));
5544 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5545 LLVM_DEBUG(User
->dump(CurDAG
));
5546 LLVM_DEBUG(dbgs() << "\nNew: ");
5547 LLVM_DEBUG(ResNode
->dump(CurDAG
));
5548 LLVM_DEBUG(dbgs() << "\n");
5550 ReplaceUses(User
, ResNode
);
5554 void PPCDAGToDAGISel::PeepholeCROps() {
5558 for (SDNode
&Node
: CurDAG
->allnodes()) {
5559 MachineSDNode
*MachineNode
= dyn_cast
<MachineSDNode
>(&Node
);
5560 if (!MachineNode
|| MachineNode
->use_empty())
5562 SDNode
*ResNode
= MachineNode
;
5564 bool Op1Set
= false, Op1Unset
= false,
5566 Op2Set
= false, Op2Unset
= false,
5569 unsigned Opcode
= MachineNode
->getMachineOpcode();
5580 SDValue Op
= MachineNode
->getOperand(1);
5581 if (Op
.isMachineOpcode()) {
5582 if (Op
.getMachineOpcode() == PPC::CRSET
)
5584 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
5586 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
5587 Op
.getOperand(0) == Op
.getOperand(1))
5594 case PPC::SELECT_I4
:
5595 case PPC::SELECT_I8
:
5596 case PPC::SELECT_F4
:
5597 case PPC::SELECT_F8
:
5598 case PPC::SELECT_QFRC
:
5599 case PPC::SELECT_QSRC
:
5600 case PPC::SELECT_QBRC
:
5601 case PPC::SELECT_SPE
:
5602 case PPC::SELECT_SPE4
:
5603 case PPC::SELECT_VRRC
:
5604 case PPC::SELECT_VSFRC
:
5605 case PPC::SELECT_VSSRC
:
5606 case PPC::SELECT_VSRC
: {
5607 SDValue Op
= MachineNode
->getOperand(0);
5608 if (Op
.isMachineOpcode()) {
5609 if (Op
.getMachineOpcode() == PPC::CRSET
)
5611 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
5613 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
5614 Op
.getOperand(0) == Op
.getOperand(1))
5621 bool SelectSwap
= false;
5625 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5627 ResNode
= MachineNode
->getOperand(0).getNode();
5630 ResNode
= MachineNode
->getOperand(1).getNode();
5633 ResNode
= MachineNode
->getOperand(0).getNode();
5634 else if (Op1Unset
|| Op2Unset
)
5635 // x & 0 = 0 & y = 0
5636 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5639 // ~x & y = andc(y, x)
5640 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5641 MVT::i1
, MachineNode
->getOperand(1),
5642 MachineNode
->getOperand(0).
5645 // x & ~y = andc(x, y)
5646 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5647 MVT::i1
, MachineNode
->getOperand(0),
5648 MachineNode
->getOperand(1).
5650 else if (AllUsersSelectZero(MachineNode
)) {
5651 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
5652 MVT::i1
, MachineNode
->getOperand(0),
5653 MachineNode
->getOperand(1));
5658 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5659 // nand(x, x) -> nor(x, x)
5660 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5661 MVT::i1
, MachineNode
->getOperand(0),
5662 MachineNode
->getOperand(0));
5664 // nand(1, y) -> nor(y, y)
5665 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5666 MVT::i1
, MachineNode
->getOperand(1),
5667 MachineNode
->getOperand(1));
5669 // nand(x, 1) -> nor(x, x)
5670 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5671 MVT::i1
, MachineNode
->getOperand(0),
5672 MachineNode
->getOperand(0));
5673 else if (Op1Unset
|| Op2Unset
)
5674 // nand(x, 0) = nand(0, y) = 1
5675 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5678 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5679 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5680 MVT::i1
, MachineNode
->getOperand(0).
5682 MachineNode
->getOperand(1));
5684 // nand(x, ~y) = ~x | y = orc(y, x)
5685 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5686 MVT::i1
, MachineNode
->getOperand(1).
5688 MachineNode
->getOperand(0));
5689 else if (AllUsersSelectZero(MachineNode
)) {
5690 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
5691 MVT::i1
, MachineNode
->getOperand(0),
5692 MachineNode
->getOperand(1));
5697 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5699 ResNode
= MachineNode
->getOperand(0).getNode();
5700 else if (Op1Set
|| Op2Set
)
5701 // x | 1 = 1 | y = 1
5702 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5706 ResNode
= MachineNode
->getOperand(1).getNode();
5709 ResNode
= MachineNode
->getOperand(0).getNode();
5711 // ~x | y = orc(y, x)
5712 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5713 MVT::i1
, MachineNode
->getOperand(1),
5714 MachineNode
->getOperand(0).
5717 // x | ~y = orc(x, y)
5718 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5719 MVT::i1
, MachineNode
->getOperand(0),
5720 MachineNode
->getOperand(1).
5722 else if (AllUsersSelectZero(MachineNode
)) {
5723 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5724 MVT::i1
, MachineNode
->getOperand(0),
5725 MachineNode
->getOperand(1));
5730 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5732 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5735 // xor(1, y) -> nor(y, y)
5736 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5737 MVT::i1
, MachineNode
->getOperand(1),
5738 MachineNode
->getOperand(1));
5740 // xor(x, 1) -> nor(x, x)
5741 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5742 MVT::i1
, MachineNode
->getOperand(0),
5743 MachineNode
->getOperand(0));
5746 ResNode
= MachineNode
->getOperand(1).getNode();
5749 ResNode
= MachineNode
->getOperand(0).getNode();
5751 // xor(~x, y) = eqv(x, y)
5752 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5753 MVT::i1
, MachineNode
->getOperand(0).
5755 MachineNode
->getOperand(1));
5757 // xor(x, ~y) = eqv(x, y)
5758 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5759 MVT::i1
, MachineNode
->getOperand(0),
5760 MachineNode
->getOperand(1).
5762 else if (AllUsersSelectZero(MachineNode
)) {
5763 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5764 MVT::i1
, MachineNode
->getOperand(0),
5765 MachineNode
->getOperand(1));
5770 if (Op1Set
|| Op2Set
)
5772 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5775 // nor(0, y) = ~y -> nor(y, y)
5776 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5777 MVT::i1
, MachineNode
->getOperand(1),
5778 MachineNode
->getOperand(1));
5781 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5782 MVT::i1
, MachineNode
->getOperand(0),
5783 MachineNode
->getOperand(0));
5785 // nor(~x, y) = andc(x, y)
5786 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5787 MVT::i1
, MachineNode
->getOperand(0).
5789 MachineNode
->getOperand(1));
5791 // nor(x, ~y) = andc(y, x)
5792 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5793 MVT::i1
, MachineNode
->getOperand(1).
5795 MachineNode
->getOperand(0));
5796 else if (AllUsersSelectZero(MachineNode
)) {
5797 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
5798 MVT::i1
, MachineNode
->getOperand(0),
5799 MachineNode
->getOperand(1));
5804 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5806 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5810 ResNode
= MachineNode
->getOperand(1).getNode();
5813 ResNode
= MachineNode
->getOperand(0).getNode();
5815 // eqv(0, y) = ~y -> nor(y, y)
5816 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5817 MVT::i1
, MachineNode
->getOperand(1),
5818 MachineNode
->getOperand(1));
5821 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5822 MVT::i1
, MachineNode
->getOperand(0),
5823 MachineNode
->getOperand(0));
5825 // eqv(~x, y) = xor(x, y)
5826 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5827 MVT::i1
, MachineNode
->getOperand(0).
5829 MachineNode
->getOperand(1));
5831 // eqv(x, ~y) = xor(x, y)
5832 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5833 MVT::i1
, MachineNode
->getOperand(0),
5834 MachineNode
->getOperand(1).
5836 else if (AllUsersSelectZero(MachineNode
)) {
5837 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5838 MVT::i1
, MachineNode
->getOperand(0),
5839 MachineNode
->getOperand(1));
5844 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5846 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5850 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5851 MVT::i1
, MachineNode
->getOperand(1),
5852 MachineNode
->getOperand(1));
5853 else if (Op1Unset
|| Op2Set
)
5854 // andc(0, y) = andc(x, 1) = 0
5855 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5859 ResNode
= MachineNode
->getOperand(0).getNode();
5861 // andc(~x, y) = ~(x | y) = nor(x, y)
5862 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5863 MVT::i1
, MachineNode
->getOperand(0).
5865 MachineNode
->getOperand(1));
5867 // andc(x, ~y) = x & y
5868 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
5869 MVT::i1
, MachineNode
->getOperand(0),
5870 MachineNode
->getOperand(1).
5872 else if (AllUsersSelectZero(MachineNode
)) {
5873 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5874 MVT::i1
, MachineNode
->getOperand(1),
5875 MachineNode
->getOperand(0));
5880 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5882 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5884 else if (Op1Set
|| Op2Unset
)
5885 // orc(1, y) = orc(x, 0) = 1
5886 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5890 ResNode
= MachineNode
->getOperand(0).getNode();
5893 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5894 MVT::i1
, MachineNode
->getOperand(1),
5895 MachineNode
->getOperand(1));
5897 // orc(~x, y) = ~(x & y) = nand(x, y)
5898 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
5899 MVT::i1
, MachineNode
->getOperand(0).
5901 MachineNode
->getOperand(1));
5903 // orc(x, ~y) = x | y
5904 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
5905 MVT::i1
, MachineNode
->getOperand(0),
5906 MachineNode
->getOperand(1).
5908 else if (AllUsersSelectZero(MachineNode
)) {
5909 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5910 MVT::i1
, MachineNode
->getOperand(1),
5911 MachineNode
->getOperand(0));
5915 case PPC::SELECT_I4
:
5916 case PPC::SELECT_I8
:
5917 case PPC::SELECT_F4
:
5918 case PPC::SELECT_F8
:
5919 case PPC::SELECT_QFRC
:
5920 case PPC::SELECT_QSRC
:
5921 case PPC::SELECT_QBRC
:
5922 case PPC::SELECT_SPE
:
5923 case PPC::SELECT_SPE4
:
5924 case PPC::SELECT_VRRC
:
5925 case PPC::SELECT_VSFRC
:
5926 case PPC::SELECT_VSSRC
:
5927 case PPC::SELECT_VSRC
:
5929 ResNode
= MachineNode
->getOperand(1).getNode();
5931 ResNode
= MachineNode
->getOperand(2).getNode();
5933 ResNode
= CurDAG
->getMachineNode(MachineNode
->getMachineOpcode(),
5935 MachineNode
->getValueType(0),
5936 MachineNode
->getOperand(0).
5938 MachineNode
->getOperand(2),
5939 MachineNode
->getOperand(1));
5944 ResNode
= CurDAG
->getMachineNode(Opcode
== PPC::BC
? PPC::BCn
:
5948 MachineNode
->getOperand(0).
5950 MachineNode
->getOperand(1),
5951 MachineNode
->getOperand(2));
5952 // FIXME: Handle Op1Set, Op1Unset here too.
5956 // If we're inverting this node because it is used only by selects that
5957 // we'd like to swap, then swap the selects before the node replacement.
5959 SwapAllSelectUsers(MachineNode
);
5961 if (ResNode
!= MachineNode
) {
5962 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5963 LLVM_DEBUG(MachineNode
->dump(CurDAG
));
5964 LLVM_DEBUG(dbgs() << "\nNew: ");
5965 LLVM_DEBUG(ResNode
->dump(CurDAG
));
5966 LLVM_DEBUG(dbgs() << "\n");
5968 ReplaceUses(MachineNode
, ResNode
);
5973 CurDAG
->RemoveDeadNodes();
5974 } while (IsModified
);
5977 // Gather the set of 32-bit operations that are known to have their
5978 // higher-order 32 bits zero, where ToPromote contains all such operations.
5979 static bool PeepholePPC64ZExtGather(SDValue Op32
,
5980 SmallPtrSetImpl
<SDNode
*> &ToPromote
) {
5981 if (!Op32
.isMachineOpcode())
5984 // First, check for the "frontier" instructions (those that will clear the
5985 // higher-order 32 bits.
5987 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
5988 // around. If it does not, then these instructions will clear the
5989 // higher-order bits.
5990 if ((Op32
.getMachineOpcode() == PPC::RLWINM
||
5991 Op32
.getMachineOpcode() == PPC::RLWNM
) &&
5992 Op32
.getConstantOperandVal(2) <= Op32
.getConstantOperandVal(3)) {
5993 ToPromote
.insert(Op32
.getNode());
5997 // SLW and SRW always clear the higher-order bits.
5998 if (Op32
.getMachineOpcode() == PPC::SLW
||
5999 Op32
.getMachineOpcode() == PPC::SRW
) {
6000 ToPromote
.insert(Op32
.getNode());
6004 // For LI and LIS, we need the immediate to be positive (so that it is not
6006 if (Op32
.getMachineOpcode() == PPC::LI
||
6007 Op32
.getMachineOpcode() == PPC::LIS
) {
6008 if (!isUInt
<15>(Op32
.getConstantOperandVal(0)))
6011 ToPromote
.insert(Op32
.getNode());
6015 // LHBRX and LWBRX always clear the higher-order bits.
6016 if (Op32
.getMachineOpcode() == PPC::LHBRX
||
6017 Op32
.getMachineOpcode() == PPC::LWBRX
) {
6018 ToPromote
.insert(Op32
.getNode());
6022 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
6023 if (Op32
.getMachineOpcode() == PPC::CNTLZW
||
6024 Op32
.getMachineOpcode() == PPC::CNTTZW
) {
6025 ToPromote
.insert(Op32
.getNode());
6029 // Next, check for those instructions we can look through.
6031 // Assuming the mask does not wrap around, then the higher-order bits are
6032 // taken directly from the first operand.
6033 if (Op32
.getMachineOpcode() == PPC::RLWIMI
&&
6034 Op32
.getConstantOperandVal(3) <= Op32
.getConstantOperandVal(4)) {
6035 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6036 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
6039 ToPromote
.insert(Op32
.getNode());
6040 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6044 // For OR, the higher-order bits are zero if that is true for both operands.
6045 // For SELECT_I4, the same is true (but the relevant operand numbers are
6047 if (Op32
.getMachineOpcode() == PPC::OR
||
6048 Op32
.getMachineOpcode() == PPC::SELECT_I4
) {
6049 unsigned B
= Op32
.getMachineOpcode() == PPC::SELECT_I4
? 1 : 0;
6050 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6051 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+0), ToPromote1
))
6053 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+1), ToPromote1
))
6056 ToPromote
.insert(Op32
.getNode());
6057 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6061 // For ORI and ORIS, we need the higher-order bits of the first operand to be
6062 // zero, and also for the constant to be positive (so that it is not sign
6064 if (Op32
.getMachineOpcode() == PPC::ORI
||
6065 Op32
.getMachineOpcode() == PPC::ORIS
) {
6066 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6067 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
6069 if (!isUInt
<15>(Op32
.getConstantOperandVal(1)))
6072 ToPromote
.insert(Op32
.getNode());
6073 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6077 // The higher-order bits of AND are zero if that is true for at least one of
6079 if (Op32
.getMachineOpcode() == PPC::AND
) {
6080 SmallPtrSet
<SDNode
*, 16> ToPromote1
, ToPromote2
;
6082 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
6084 PeepholePPC64ZExtGather(Op32
.getOperand(1), ToPromote2
);
6085 if (!Op0OK
&& !Op1OK
)
6088 ToPromote
.insert(Op32
.getNode());
6091 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6094 ToPromote
.insert(ToPromote2
.begin(), ToPromote2
.end());
6099 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
6100 // of the first operand, or if the second operand is positive (so that it is
6101 // not sign extended).
6102 if (Op32
.getMachineOpcode() == PPC::ANDIo
||
6103 Op32
.getMachineOpcode() == PPC::ANDISo
) {
6104 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6106 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
6107 bool Op1OK
= isUInt
<15>(Op32
.getConstantOperandVal(1));
6108 if (!Op0OK
&& !Op1OK
)
6111 ToPromote
.insert(Op32
.getNode());
6114 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6122 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
6123 if (!PPCSubTarget
->isPPC64())
6126 // When we zero-extend from i32 to i64, we use a pattern like this:
6127 // def : Pat<(i64 (zext i32:$in)),
6128 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
6130 // There are several 32-bit shift/rotate instructions, however, that will
6131 // clear the higher-order bits of their output, rendering the RLDICL
6132 // unnecessary. When that happens, we remove it here, and redefine the
6133 // relevant 32-bit operation to be a 64-bit operation.
6135 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6137 bool MadeChange
= false;
6138 while (Position
!= CurDAG
->allnodes_begin()) {
6139 SDNode
*N
= &*--Position
;
6140 // Skip dead nodes and any non-machine opcodes.
6141 if (N
->use_empty() || !N
->isMachineOpcode())
6144 if (N
->getMachineOpcode() != PPC::RLDICL
)
6147 if (N
->getConstantOperandVal(1) != 0 ||
6148 N
->getConstantOperandVal(2) != 32)
6151 SDValue ISR
= N
->getOperand(0);
6152 if (!ISR
.isMachineOpcode() ||
6153 ISR
.getMachineOpcode() != TargetOpcode::INSERT_SUBREG
)
6156 if (!ISR
.hasOneUse())
6159 if (ISR
.getConstantOperandVal(2) != PPC::sub_32
)
6162 SDValue IDef
= ISR
.getOperand(0);
6163 if (!IDef
.isMachineOpcode() ||
6164 IDef
.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF
)
6167 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
6168 // can get rid of it.
6170 SDValue Op32
= ISR
->getOperand(1);
6171 if (!Op32
.isMachineOpcode())
6174 // There are some 32-bit instructions that always clear the high-order 32
6175 // bits, there are also some instructions (like AND) that we can look
6177 SmallPtrSet
<SDNode
*, 16> ToPromote
;
6178 if (!PeepholePPC64ZExtGather(Op32
, ToPromote
))
6181 // If the ToPromote set contains nodes that have uses outside of the set
6182 // (except for the original INSERT_SUBREG), then abort the transformation.
6183 bool OutsideUse
= false;
6184 for (SDNode
*PN
: ToPromote
) {
6185 for (SDNode
*UN
: PN
->uses()) {
6186 if (!ToPromote
.count(UN
) && UN
!= ISR
.getNode()) {
6200 // We now know that this zero extension can be removed by promoting to
6201 // nodes in ToPromote to 64-bit operations, where for operations in the
6202 // frontier of the set, we need to insert INSERT_SUBREGs for their
6204 for (SDNode
*PN
: ToPromote
) {
6206 switch (PN
->getMachineOpcode()) {
6208 llvm_unreachable("Don't know the 64-bit variant of this instruction");
6209 case PPC::RLWINM
: NewOpcode
= PPC::RLWINM8
; break;
6210 case PPC::RLWNM
: NewOpcode
= PPC::RLWNM8
; break;
6211 case PPC::SLW
: NewOpcode
= PPC::SLW8
; break;
6212 case PPC::SRW
: NewOpcode
= PPC::SRW8
; break;
6213 case PPC::LI
: NewOpcode
= PPC::LI8
; break;
6214 case PPC::LIS
: NewOpcode
= PPC::LIS8
; break;
6215 case PPC::LHBRX
: NewOpcode
= PPC::LHBRX8
; break;
6216 case PPC::LWBRX
: NewOpcode
= PPC::LWBRX8
; break;
6217 case PPC::CNTLZW
: NewOpcode
= PPC::CNTLZW8
; break;
6218 case PPC::CNTTZW
: NewOpcode
= PPC::CNTTZW8
; break;
6219 case PPC::RLWIMI
: NewOpcode
= PPC::RLWIMI8
; break;
6220 case PPC::OR
: NewOpcode
= PPC::OR8
; break;
6221 case PPC::SELECT_I4
: NewOpcode
= PPC::SELECT_I8
; break;
6222 case PPC::ORI
: NewOpcode
= PPC::ORI8
; break;
6223 case PPC::ORIS
: NewOpcode
= PPC::ORIS8
; break;
6224 case PPC::AND
: NewOpcode
= PPC::AND8
; break;
6225 case PPC::ANDIo
: NewOpcode
= PPC::ANDIo8
; break;
6226 case PPC::ANDISo
: NewOpcode
= PPC::ANDISo8
; break;
6229 // Note: During the replacement process, the nodes will be in an
6230 // inconsistent state (some instructions will have operands with values
6231 // of the wrong type). Once done, however, everything should be right
6234 SmallVector
<SDValue
, 4> Ops
;
6235 for (const SDValue
&V
: PN
->ops()) {
6236 if (!ToPromote
.count(V
.getNode()) && V
.getValueType() == MVT::i32
&&
6237 !isa
<ConstantSDNode
>(V
)) {
6238 SDValue ReplOpOps
[] = { ISR
.getOperand(0), V
, ISR
.getOperand(2) };
6240 CurDAG
->getMachineNode(TargetOpcode::INSERT_SUBREG
, SDLoc(V
),
6241 ISR
.getNode()->getVTList(), ReplOpOps
);
6242 Ops
.push_back(SDValue(ReplOp
, 0));
6248 // Because all to-be-promoted nodes only have users that are other
6249 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6250 // the i32 result value type with i64.
6252 SmallVector
<EVT
, 2> NewVTs
;
6253 SDVTList VTs
= PN
->getVTList();
6254 for (unsigned i
= 0, ie
= VTs
.NumVTs
; i
!= ie
; ++i
)
6255 if (VTs
.VTs
[i
] == MVT::i32
)
6256 NewVTs
.push_back(MVT::i64
);
6258 NewVTs
.push_back(VTs
.VTs
[i
]);
6260 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
6261 LLVM_DEBUG(PN
->dump(CurDAG
));
6263 CurDAG
->SelectNodeTo(PN
, NewOpcode
, CurDAG
->getVTList(NewVTs
), Ops
);
6265 LLVM_DEBUG(dbgs() << "\nNew: ");
6266 LLVM_DEBUG(PN
->dump(CurDAG
));
6267 LLVM_DEBUG(dbgs() << "\n");
6270 // Now we replace the original zero extend and its associated INSERT_SUBREG
6271 // with the value feeding the INSERT_SUBREG (which has now been promoted to
6274 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
6275 LLVM_DEBUG(N
->dump(CurDAG
));
6276 LLVM_DEBUG(dbgs() << "\nNew: ");
6277 LLVM_DEBUG(Op32
.getNode()->dump(CurDAG
));
6278 LLVM_DEBUG(dbgs() << "\n");
6280 ReplaceUses(N
, Op32
.getNode());
6284 CurDAG
->RemoveDeadNodes();
6287 void PPCDAGToDAGISel::PeepholePPC64() {
6288 // These optimizations are currently supported only for 64-bit SVR4.
6289 if (PPCSubTarget
->isDarwin() || !PPCSubTarget
->isPPC64())
6292 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6294 while (Position
!= CurDAG
->allnodes_begin()) {
6295 SDNode
*N
= &*--Position
;
6296 // Skip dead nodes and any non-machine opcodes.
6297 if (N
->use_empty() || !N
->isMachineOpcode())
6301 unsigned StorageOpcode
= N
->getMachineOpcode();
6302 bool RequiresMod4Offset
= false;
6304 switch (StorageOpcode
) {
6309 case PPC::DFLOADf64
:
6310 case PPC::DFLOADf32
:
6311 RequiresMod4Offset
= true;
6327 case PPC::DFSTOREf64
:
6328 case PPC::DFSTOREf32
:
6329 RequiresMod4Offset
= true;
6343 // If this is a load or store with a zero offset, or within the alignment,
6344 // we may be able to fold an add-immediate into the memory operation.
6345 // The check against alignment is below, as it can't occur until we check
6346 // the arguments to N
6347 if (!isa
<ConstantSDNode
>(N
->getOperand(FirstOp
)))
6350 SDValue Base
= N
->getOperand(FirstOp
+ 1);
6351 if (!Base
.isMachineOpcode())
6355 bool ReplaceFlags
= true;
6357 // When the feeding operation is an add-immediate of some sort,
6358 // determine whether we need to add relocation information to the
6359 // target flags on the immediate operand when we fold it into the
6360 // load instruction.
6362 // For something like ADDItocL, the relocation information is
6363 // inferred from the opcode; when we process it in the AsmPrinter,
6364 // we add the necessary relocation there. A load, though, can receive
6365 // relocation from various flavors of ADDIxxx, so we need to carry
6366 // the relocation information in the target flags.
6367 switch (Base
.getMachineOpcode()) {
6372 // In some cases (such as TLS) the relocation information
6373 // is already in place on the operand, so copying the operand
6375 ReplaceFlags
= false;
6376 // For these cases, the immediate may not be divisible by 4, in
6377 // which case the fold is illegal for DS-form instructions. (The
6378 // other cases provide aligned addresses and are always safe.)
6379 if (RequiresMod4Offset
&&
6380 (!isa
<ConstantSDNode
>(Base
.getOperand(1)) ||
6381 Base
.getConstantOperandVal(1) % 4 != 0))
6384 case PPC::ADDIdtprelL
:
6385 Flags
= PPCII::MO_DTPREL_LO
;
6387 case PPC::ADDItlsldL
:
6388 Flags
= PPCII::MO_TLSLD_LO
;
6391 Flags
= PPCII::MO_TOC_LO
;
6395 SDValue ImmOpnd
= Base
.getOperand(1);
6397 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6398 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6399 // we might have needed different @ha relocation values for the offset
6401 int MaxDisplacement
= 7;
6402 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
6403 const GlobalValue
*GV
= GA
->getGlobal();
6404 MaxDisplacement
= std::min((int) GV
->getAlignment() - 1, MaxDisplacement
);
6407 bool UpdateHBase
= false;
6408 SDValue HBase
= Base
.getOperand(0);
6410 int Offset
= N
->getConstantOperandVal(FirstOp
);
6412 if (Offset
< 0 || Offset
> MaxDisplacement
) {
6413 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6414 // one use, then we can do this for any offset, we just need to also
6415 // update the offset (i.e. the symbol addend) on the addis also.
6416 if (Base
.getMachineOpcode() != PPC::ADDItocL
)
6419 if (!HBase
.isMachineOpcode() ||
6420 HBase
.getMachineOpcode() != PPC::ADDIStocHA
)
6423 if (!Base
.hasOneUse() || !HBase
.hasOneUse())
6426 SDValue HImmOpnd
= HBase
.getOperand(1);
6427 if (HImmOpnd
!= ImmOpnd
)
6433 // If we're directly folding the addend from an addi instruction, then:
6434 // 1. In general, the offset on the memory access must be zero.
6435 // 2. If the addend is a constant, then it can be combined with a
6436 // non-zero offset, but only if the result meets the encoding
6438 if (auto *C
= dyn_cast
<ConstantSDNode
>(ImmOpnd
)) {
6439 Offset
+= C
->getSExtValue();
6441 if (RequiresMod4Offset
&& (Offset
% 4) != 0)
6444 if (!isInt
<16>(Offset
))
6447 ImmOpnd
= CurDAG
->getTargetConstant(Offset
, SDLoc(ImmOpnd
),
6448 ImmOpnd
.getValueType());
6449 } else if (Offset
!= 0) {
6454 // We found an opportunity. Reverse the operands from the add
6455 // immediate and substitute them into the load or store. If
6456 // needed, update the target flags for the immediate operand to
6457 // reflect the necessary relocation information.
6458 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
6459 LLVM_DEBUG(Base
->dump(CurDAG
));
6460 LLVM_DEBUG(dbgs() << "\nN: ");
6461 LLVM_DEBUG(N
->dump(CurDAG
));
6462 LLVM_DEBUG(dbgs() << "\n");
6464 // If the relocation information isn't already present on the
6465 // immediate operand, add it now.
6467 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
6469 const GlobalValue
*GV
= GA
->getGlobal();
6470 // We can't perform this optimization for data whose alignment
6471 // is insufficient for the instruction encoding.
6472 if (GV
->getAlignment() < 4 &&
6473 (RequiresMod4Offset
|| (Offset
% 4) != 0)) {
6474 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6477 ImmOpnd
= CurDAG
->getTargetGlobalAddress(GV
, dl
, MVT::i64
, Offset
, Flags
);
6478 } else if (ConstantPoolSDNode
*CP
=
6479 dyn_cast
<ConstantPoolSDNode
>(ImmOpnd
)) {
6480 const Constant
*C
= CP
->getConstVal();
6481 ImmOpnd
= CurDAG
->getTargetConstantPool(C
, MVT::i64
,
6487 if (FirstOp
== 1) // Store
6488 (void)CurDAG
->UpdateNodeOperands(N
, N
->getOperand(0), ImmOpnd
,
6489 Base
.getOperand(0), N
->getOperand(3));
6491 (void)CurDAG
->UpdateNodeOperands(N
, ImmOpnd
, Base
.getOperand(0),
6495 (void)CurDAG
->UpdateNodeOperands(HBase
.getNode(), HBase
.getOperand(0),
6498 // The add-immediate may now be dead, in which case remove it.
6499 if (Base
.getNode()->use_empty())
6500 CurDAG
->RemoveDeadNode(Base
.getNode());
6504 /// createPPCISelDag - This pass converts a legalized DAG into a
6505 /// PowerPC-specific DAG, ready for instruction scheduling.
6507 FunctionPass
*llvm::createPPCISelDag(PPCTargetMachine
&TM
,
6508 CodeGenOpt::Level OptLevel
) {
6509 return new PPCDAGToDAGISel(TM
, OptLevel
);