1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
12 //===----------------------------------------------------------------------===//
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "MCTargetDesc/PPCPredicates.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/BranchProbabilityInfo.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGISel.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetRegisterInfo.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/KnownBits.h"
54 #include "llvm/Support/MachineValueType.h"
55 #include "llvm/Support/MathExtras.h"
56 #include "llvm/Support/raw_ostream.h"
69 #define DEBUG_TYPE "ppc-codegen"
71 STATISTIC(NumSextSetcc
,
72 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
73 STATISTIC(NumZextSetcc
,
74 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
75 STATISTIC(SignExtensionsAdded
,
76 "Number of sign extensions for compare inputs added.");
77 STATISTIC(ZeroExtensionsAdded
,
78 "Number of zero extensions for compare inputs added.");
79 STATISTIC(NumLogicOpsOnComparison
,
80 "Number of logical ops on i1 values calculated in GPR.");
81 STATISTIC(OmittedForNonExtendUses
,
82 "Number of compares not eliminated as they have non-extending uses.");
84 "Number of compares lowered to setb.");
86 // FIXME: Remove this once the bug has been fixed!
87 cl::opt
<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
88 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden
);
91 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
92 cl::desc("use aggressive ppc isel for bit permutations"),
94 static cl::opt
<bool> BPermRewriterNoMasking(
95 "ppc-bit-perm-rewriter-stress-rotates",
96 cl::desc("stress rotate selection in aggressive ppc isel for "
100 static cl::opt
<bool> EnableBranchHint(
101 "ppc-use-branch-hint", cl::init(true),
102 cl::desc("Enable static hinting of branches on ppc"),
105 static cl::opt
<bool> EnableTLSOpt(
106 "ppc-tls-opt", cl::init(true),
107 cl::desc("Enable tls optimization peephole"),
110 enum ICmpInGPRType
{ ICGPR_All
, ICGPR_None
, ICGPR_I32
, ICGPR_I64
,
111 ICGPR_NonExtIn
, ICGPR_Zext
, ICGPR_Sext
, ICGPR_ZextI32
,
112 ICGPR_SextI32
, ICGPR_ZextI64
, ICGPR_SextI64
};
114 static cl::opt
<ICmpInGPRType
> CmpInGPR(
115 "ppc-gpr-icmps", cl::Hidden
, cl::init(ICGPR_All
),
116 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
117 cl::values(clEnumValN(ICGPR_None
, "none", "Do not modify integer comparisons."),
118 clEnumValN(ICGPR_All
, "all", "All possible int comparisons in GPRs."),
119 clEnumValN(ICGPR_I32
, "i32", "Only i32 comparisons in GPRs."),
120 clEnumValN(ICGPR_I64
, "i64", "Only i64 comparisons in GPRs."),
121 clEnumValN(ICGPR_NonExtIn
, "nonextin",
122 "Only comparisons where inputs don't need [sz]ext."),
123 clEnumValN(ICGPR_Zext
, "zext", "Only comparisons with zext result."),
124 clEnumValN(ICGPR_ZextI32
, "zexti32",
125 "Only i32 comparisons with zext result."),
126 clEnumValN(ICGPR_ZextI64
, "zexti64",
127 "Only i64 comparisons with zext result."),
128 clEnumValN(ICGPR_Sext
, "sext", "Only comparisons with sext result."),
129 clEnumValN(ICGPR_SextI32
, "sexti32",
130 "Only i32 comparisons with sext result."),
131 clEnumValN(ICGPR_SextI64
, "sexti64",
132 "Only i64 comparisons with sext result.")));
135 //===--------------------------------------------------------------------===//
136 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
137 /// instructions for SelectionDAG operations.
139 class PPCDAGToDAGISel
: public SelectionDAGISel
{
140 const PPCTargetMachine
&TM
;
141 const PPCSubtarget
*PPCSubTarget
;
142 const PPCTargetLowering
*PPCLowering
;
143 unsigned GlobalBaseReg
;
146 explicit PPCDAGToDAGISel(PPCTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
147 : SelectionDAGISel(tm
, OptLevel
), TM(tm
) {}
149 bool runOnMachineFunction(MachineFunction
&MF
) override
{
150 // Make sure we re-emit a set of the global base reg if necessary
152 PPCSubTarget
= &MF
.getSubtarget
<PPCSubtarget
>();
153 PPCLowering
= PPCSubTarget
->getTargetLowering();
154 SelectionDAGISel::runOnMachineFunction(MF
);
156 if (!PPCSubTarget
->isSVR4ABI())
157 InsertVRSaveCode(MF
);
162 void PreprocessISelDAG() override
;
163 void PostprocessISelDAG() override
;
165 /// getI16Imm - Return a target constant with the specified value, of type
167 inline SDValue
getI16Imm(unsigned Imm
, const SDLoc
&dl
) {
168 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i16
);
171 /// getI32Imm - Return a target constant with the specified value, of type
173 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
174 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
177 /// getI64Imm - Return a target constant with the specified value, of type
179 inline SDValue
getI64Imm(uint64_t Imm
, const SDLoc
&dl
) {
180 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
183 /// getSmallIPtrImm - Return a target constant of pointer type.
184 inline SDValue
getSmallIPtrImm(unsigned Imm
, const SDLoc
&dl
) {
185 return CurDAG
->getTargetConstant(
186 Imm
, dl
, PPCLowering
->getPointerTy(CurDAG
->getDataLayout()));
189 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
190 /// rotate and mask opcode and mask operation.
191 static bool isRotateAndMask(SDNode
*N
, unsigned Mask
, bool isShiftMask
,
192 unsigned &SH
, unsigned &MB
, unsigned &ME
);
194 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
195 /// base register. Return the virtual register that holds this value.
196 SDNode
*getGlobalBaseReg();
198 void selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
= 0);
200 // Select - Convert the specified operand from a target-independent to a
201 // target-specific node if it hasn't already been changed.
202 void Select(SDNode
*N
) override
;
204 bool tryBitfieldInsert(SDNode
*N
);
205 bool tryBitPermutation(SDNode
*N
);
206 bool tryIntCompareInGPR(SDNode
*N
);
208 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
209 // an X-Form load instruction with the offset being a relocation coming from
210 // the PPCISD::ADD_TLS.
211 bool tryTLSXFormLoad(LoadSDNode
*N
);
212 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
213 // an X-Form store instruction with the offset being a relocation coming from
214 // the PPCISD::ADD_TLS.
215 bool tryTLSXFormStore(StoreSDNode
*N
);
216 /// SelectCC - Select a comparison of the specified values with the
217 /// specified condition code, returning the CR# of the expression.
218 SDValue
SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
221 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
222 /// immediate field. Note that the operand at this point is already the
223 /// result of a prior SelectAddressRegImm call.
224 bool SelectAddrImmOffs(SDValue N
, SDValue
&Out
) const {
225 if (N
.getOpcode() == ISD::TargetConstant
||
226 N
.getOpcode() == ISD::TargetGlobalAddress
) {
234 /// SelectAddrIdx - Given the specified address, check to see if it can be
235 /// represented as an indexed [r+r] operation.
236 /// This is for xform instructions whose associated displacement form is D.
237 /// The last parameter \p 0 means associated D form has no requirment for 16
238 /// bit signed displacement.
239 /// Returns false if it can be represented by [r+imm], which are preferred.
240 bool SelectAddrIdx(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
241 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
, 0);
244 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
245 /// represented as an indexed [r+r] operation.
246 /// This is for xform instructions whose associated displacement form is DS.
247 /// The last parameter \p 4 means associated DS form 16 bit signed
248 /// displacement must be a multiple of 4.
249 /// Returns false if it can be represented by [r+imm], which are preferred.
250 bool SelectAddrIdxX4(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
251 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
, 4);
254 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
255 /// represented as an indexed [r+r] operation.
256 /// This is for xform instructions whose associated displacement form is DQ.
257 /// The last parameter \p 16 means associated DQ form 16 bit signed
258 /// displacement must be a multiple of 16.
259 /// Returns false if it can be represented by [r+imm], which are preferred.
260 bool SelectAddrIdxX16(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
261 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
, 16);
264 /// SelectAddrIdxOnly - Given the specified address, force it to be
265 /// represented as an indexed [r+r] operation.
266 bool SelectAddrIdxOnly(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
267 return PPCLowering
->SelectAddressRegRegOnly(N
, Base
, Index
, *CurDAG
);
270 /// SelectAddrImm - Returns true if the address N can be represented by
271 /// a base register plus a signed 16-bit displacement [r+imm].
272 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
274 bool SelectAddrImm(SDValue N
, SDValue
&Disp
,
276 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 0);
279 /// SelectAddrImmX4 - Returns true if the address N can be represented by
280 /// a base register plus a signed 16-bit displacement that is a multiple of
281 /// 4 (last parameter). Suitable for use by STD and friends.
282 bool SelectAddrImmX4(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
283 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 4);
286 /// SelectAddrImmX16 - Returns true if the address N can be represented by
287 /// a base register plus a signed 16-bit displacement that is a multiple of
288 /// 16(last parameter). Suitable for use by STXV and friends.
289 bool SelectAddrImmX16(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
290 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 16);
293 // Select an address into a single register.
294 bool SelectAddr(SDValue N
, SDValue
&Base
) {
299 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
300 /// inline asm expressions. It is always correct to compute the value into
301 /// a register. The case of adding a (possibly relocatable) constant to a
302 /// register can be improved, but it is wrong to substitute Reg+Reg for
303 /// Reg in an asm, because the load or store opcode would have to change.
304 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
305 unsigned ConstraintID
,
306 std::vector
<SDValue
> &OutOps
) override
{
307 switch(ConstraintID
) {
309 errs() << "ConstraintID: " << ConstraintID
<< "\n";
310 llvm_unreachable("Unexpected asm memory constraint");
311 case InlineAsm::Constraint_es
:
312 case InlineAsm::Constraint_i
:
313 case InlineAsm::Constraint_m
:
314 case InlineAsm::Constraint_o
:
315 case InlineAsm::Constraint_Q
:
316 case InlineAsm::Constraint_Z
:
317 case InlineAsm::Constraint_Zy
:
318 // We need to make sure that this one operand does not end up in r0
319 // (because we might end up lowering this as 0(%op)).
320 const TargetRegisterInfo
*TRI
= PPCSubTarget
->getRegisterInfo();
321 const TargetRegisterClass
*TRC
= TRI
->getPointerRegClass(*MF
, /*Kind=*/1);
323 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), dl
, MVT::i32
);
325 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
326 dl
, Op
.getValueType(),
329 OutOps
.push_back(NewOp
);
335 void InsertVRSaveCode(MachineFunction
&MF
);
337 StringRef
getPassName() const override
{
338 return "PowerPC DAG->DAG Pattern Instruction Selection";
341 // Include the pieces autogenerated from the target description.
342 #include "PPCGenDAGISel.inc"
345 bool trySETCC(SDNode
*N
);
347 void PeepholePPC64();
348 void PeepholePPC64ZExt();
349 void PeepholeCROps();
351 SDValue
combineToCMPB(SDNode
*N
);
352 void foldBoolExts(SDValue
&Res
, SDNode
*&N
);
354 bool AllUsersSelectZero(SDNode
*N
);
355 void SwapAllSelectUsers(SDNode
*N
);
357 bool isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const;
358 void transferMemOperands(SDNode
*N
, SDNode
*Result
);
361 } // end anonymous namespace
363 /// InsertVRSaveCode - Once the entire function has been instruction selected,
364 /// all virtual registers are created and all machine instructions are built,
365 /// check to see if we need to save/restore VRSAVE. If so, do it.
366 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction
&Fn
) {
367 // Check to see if this function uses vector registers, which means we have to
368 // save and restore the VRSAVE register and update it with the regs we use.
370 // In this case, there will be virtual registers of vector type created
371 // by the scheduler. Detect them now.
372 bool HasVectorVReg
= false;
373 for (unsigned i
= 0, e
= RegInfo
->getNumVirtRegs(); i
!= e
; ++i
) {
374 unsigned Reg
= TargetRegisterInfo::index2VirtReg(i
);
375 if (RegInfo
->getRegClass(Reg
) == &PPC::VRRCRegClass
) {
376 HasVectorVReg
= true;
380 if (!HasVectorVReg
) return; // nothing to do.
382 // If we have a vector register, we want to emit code into the entry and exit
383 // blocks to save and restore the VRSAVE register. We do this here (instead
384 // of marking all vector instructions as clobbering VRSAVE) for two reasons:
386 // 1. This (trivially) reduces the load on the register allocator, by not
387 // having to represent the live range of the VRSAVE register.
388 // 2. This (more significantly) allows us to create a temporary virtual
389 // register to hold the saved VRSAVE value, allowing this temporary to be
390 // register allocated, instead of forcing it to be spilled to the stack.
392 // Create two vregs - one to hold the VRSAVE register that is live-in to the
393 // function and one for the value after having bits or'd into it.
394 unsigned InVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
395 unsigned UpdatedVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
397 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
398 MachineBasicBlock
&EntryBB
= *Fn
.begin();
400 // Emit the following code into the entry block:
401 // InVRSAVE = MFVRSAVE
402 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
403 // MTVRSAVE UpdatedVRSAVE
404 MachineBasicBlock::iterator IP
= EntryBB
.begin(); // Insert Point
405 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MFVRSAVE
), InVRSAVE
);
406 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::UPDATE_VRSAVE
),
407 UpdatedVRSAVE
).addReg(InVRSAVE
);
408 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(UpdatedVRSAVE
);
410 // Find all return blocks, outputting a restore in each epilog.
411 for (MachineFunction::iterator BB
= Fn
.begin(), E
= Fn
.end(); BB
!= E
; ++BB
) {
412 if (BB
->isReturnBlock()) {
413 IP
= BB
->end(); --IP
;
415 // Skip over all terminator instructions, which are part of the return
417 MachineBasicBlock::iterator I2
= IP
;
418 while (I2
!= BB
->begin() && (--I2
)->isTerminator())
421 // Emit: MTVRSAVE InVRSave
422 BuildMI(*BB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(InVRSAVE
);
427 /// getGlobalBaseReg - Output the instructions required to put the
428 /// base address to use for accessing globals into a register.
430 SDNode
*PPCDAGToDAGISel::getGlobalBaseReg() {
431 if (!GlobalBaseReg
) {
432 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
433 // Insert the set of GlobalBaseReg into the first MBB of the function
434 MachineBasicBlock
&FirstMBB
= MF
->front();
435 MachineBasicBlock::iterator MBBI
= FirstMBB
.begin();
436 const Module
*M
= MF
->getFunction().getParent();
439 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) == MVT::i32
) {
440 if (PPCSubTarget
->isTargetELF()) {
441 GlobalBaseReg
= PPC::R30
;
442 if (!PPCSubTarget
->isSecurePlt() &&
443 M
->getPICLevel() == PICLevel::SmallPIC
) {
444 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MoveGOTtoLR
));
445 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
446 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
448 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
449 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
450 unsigned TempReg
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
451 BuildMI(FirstMBB
, MBBI
, dl
,
452 TII
.get(PPC::UpdateGBR
), GlobalBaseReg
)
453 .addReg(TempReg
, RegState::Define
).addReg(GlobalBaseReg
);
454 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
458 RegInfo
->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass
);
459 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
460 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
463 // We must ensure that this sequence is dominated by the prologue.
464 // FIXME: This is a bit of a big hammer since we don't get the benefits
465 // of shrink-wrapping whenever we emit this instruction. Considering
466 // this is used in any function where we emit a jump table, this may be
467 // a significant limitation. We should consider inserting this in the
468 // block where it is used and then commoning this sequence up if it
469 // appears in multiple places.
470 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
472 MF
->getInfo
<PPCFunctionInfo
>()->setShrinkWrapDisabled(true);
473 GlobalBaseReg
= RegInfo
->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
474 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR8
));
475 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR8
), GlobalBaseReg
);
478 return CurDAG
->getRegister(GlobalBaseReg
,
479 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()))
483 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
484 /// operand. If so Imm will receive the 32-bit value.
485 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
486 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
487 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
493 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
494 /// operand. If so Imm will receive the 64-bit value.
495 static bool isInt64Immediate(SDNode
*N
, uint64_t &Imm
) {
496 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i64
) {
497 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
503 // isInt32Immediate - This method tests to see if a constant operand.
504 // If so Imm will receive the 32 bit value.
505 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
506 return isInt32Immediate(N
.getNode(), Imm
);
509 /// isInt64Immediate - This method tests to see if the value is a 64-bit
510 /// constant operand. If so Imm will receive the 64-bit value.
511 static bool isInt64Immediate(SDValue N
, uint64_t &Imm
) {
512 return isInt64Immediate(N
.getNode(), Imm
);
515 static unsigned getBranchHint(unsigned PCC
, FunctionLoweringInfo
*FuncInfo
,
516 const SDValue
&DestMBB
) {
517 assert(isa
<BasicBlockSDNode
>(DestMBB
));
519 if (!FuncInfo
->BPI
) return PPC::BR_NO_HINT
;
521 const BasicBlock
*BB
= FuncInfo
->MBB
->getBasicBlock();
522 const Instruction
*BBTerm
= BB
->getTerminator();
524 if (BBTerm
->getNumSuccessors() != 2) return PPC::BR_NO_HINT
;
526 const BasicBlock
*TBB
= BBTerm
->getSuccessor(0);
527 const BasicBlock
*FBB
= BBTerm
->getSuccessor(1);
529 auto TProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, TBB
);
530 auto FProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, FBB
);
532 // We only want to handle cases which are easy to predict at static time, e.g.
533 // C++ throw statement, that is very likely not taken, or calling never
534 // returned function, e.g. stdlib exit(). So we set Threshold to filter
537 // Below is LLVM branch weight table, we only want to handle case 1, 2
539 // Case Taken:Nontaken Example
540 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
541 // 2. Invoke-terminating 1:1048575
542 // 3. Coldblock 4:64 __builtin_expect
543 // 4. Loop Branch 124:4 For loop
544 // 5. PH/ZH/FPH 20:12
545 const uint32_t Threshold
= 10000;
547 if (std::max(TProb
, FProb
) / Threshold
< std::min(TProb
, FProb
))
548 return PPC::BR_NO_HINT
;
550 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo
->Fn
->getName()
551 << "::" << BB
->getName() << "'\n"
552 << " -> " << TBB
->getName() << ": " << TProb
<< "\n"
553 << " -> " << FBB
->getName() << ": " << FProb
<< "\n");
555 const BasicBlockSDNode
*BBDN
= cast
<BasicBlockSDNode
>(DestMBB
);
557 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
558 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
559 if (BBDN
->getBasicBlock()->getBasicBlock() != TBB
)
560 std::swap(TProb
, FProb
);
562 return (TProb
> FProb
) ? PPC::BR_TAKEN_HINT
: PPC::BR_NONTAKEN_HINT
;
565 // isOpcWithIntImmediate - This method tests to see if the node is a specific
566 // opcode and that it has a immediate integer right operand.
567 // If so Imm will receive the 32 bit value.
568 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
569 return N
->getOpcode() == Opc
570 && isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
573 void PPCDAGToDAGISel::selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
) {
575 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
576 SDValue TFI
= CurDAG
->getTargetFrameIndex(FI
, N
->getValueType(0));
577 unsigned Opc
= N
->getValueType(0) == MVT::i32
? PPC::ADDI
: PPC::ADDI8
;
579 CurDAG
->SelectNodeTo(SN
, Opc
, N
->getValueType(0), TFI
,
580 getSmallIPtrImm(Offset
, dl
));
582 ReplaceNode(SN
, CurDAG
->getMachineNode(Opc
, dl
, N
->getValueType(0), TFI
,
583 getSmallIPtrImm(Offset
, dl
)));
586 bool PPCDAGToDAGISel::isRotateAndMask(SDNode
*N
, unsigned Mask
,
587 bool isShiftMask
, unsigned &SH
,
588 unsigned &MB
, unsigned &ME
) {
589 // Don't even go down this path for i64, since different logic will be
590 // necessary for rldicl/rldicr/rldimi.
591 if (N
->getValueType(0) != MVT::i32
)
595 unsigned Indeterminant
= ~0; // bit mask marking indeterminant results
596 unsigned Opcode
= N
->getOpcode();
597 if (N
->getNumOperands() != 2 ||
598 !isInt32Immediate(N
->getOperand(1).getNode(), Shift
) || (Shift
> 31))
601 if (Opcode
== ISD::SHL
) {
602 // apply shift left to mask if it comes first
603 if (isShiftMask
) Mask
= Mask
<< Shift
;
604 // determine which bits are made indeterminant by shift
605 Indeterminant
= ~(0xFFFFFFFFu
<< Shift
);
606 } else if (Opcode
== ISD::SRL
) {
607 // apply shift right to mask if it comes first
608 if (isShiftMask
) Mask
= Mask
>> Shift
;
609 // determine which bits are made indeterminant by shift
610 Indeterminant
= ~(0xFFFFFFFFu
>> Shift
);
611 // adjust for the left rotate
613 } else if (Opcode
== ISD::ROTL
) {
619 // if the mask doesn't intersect any Indeterminant bits
620 if (Mask
&& !(Mask
& Indeterminant
)) {
622 // make sure the mask is still a mask (wrap arounds may not be)
623 return isRunOfOnes(Mask
, MB
, ME
);
628 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode
*ST
) {
629 SDValue Base
= ST
->getBasePtr();
630 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
632 SDValue Offset
= ST
->getOffset();
633 if (!Offset
.isUndef())
637 EVT MemVT
= ST
->getMemoryVT();
638 EVT RegVT
= ST
->getValue().getValueType();
641 switch (MemVT
.getSimpleVT().SimpleTy
) {
645 Opcode
= (RegVT
== MVT::i32
) ? PPC::STBXTLS_32
: PPC::STBXTLS
;
649 Opcode
= (RegVT
== MVT::i32
) ? PPC::STHXTLS_32
: PPC::STHXTLS
;
653 Opcode
= (RegVT
== MVT::i32
) ? PPC::STWXTLS_32
: PPC::STWXTLS
;
657 Opcode
= PPC::STDXTLS
;
661 SDValue Chain
= ST
->getChain();
662 SDVTList VTs
= ST
->getVTList();
663 SDValue Ops
[] = {ST
->getValue(), Base
.getOperand(0), Base
.getOperand(1),
665 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
666 transferMemOperands(ST
, MN
);
671 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode
*LD
) {
672 SDValue Base
= LD
->getBasePtr();
673 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
675 SDValue Offset
= LD
->getOffset();
676 if (!Offset
.isUndef())
680 EVT MemVT
= LD
->getMemoryVT();
681 EVT RegVT
= LD
->getValueType(0);
683 switch (MemVT
.getSimpleVT().SimpleTy
) {
687 Opcode
= (RegVT
== MVT::i32
) ? PPC::LBZXTLS_32
: PPC::LBZXTLS
;
691 Opcode
= (RegVT
== MVT::i32
) ? PPC::LHZXTLS_32
: PPC::LHZXTLS
;
695 Opcode
= (RegVT
== MVT::i32
) ? PPC::LWZXTLS_32
: PPC::LWZXTLS
;
699 Opcode
= PPC::LDXTLS
;
703 SDValue Chain
= LD
->getChain();
704 SDVTList VTs
= LD
->getVTList();
705 SDValue Ops
[] = {Base
.getOperand(0), Base
.getOperand(1), Chain
};
706 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
707 transferMemOperands(LD
, MN
);
712 /// Turn an or of two masked values into the rotate left word immediate then
713 /// mask insert (rlwimi) instruction.
714 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode
*N
) {
715 SDValue Op0
= N
->getOperand(0);
716 SDValue Op1
= N
->getOperand(1);
719 KnownBits LKnown
= CurDAG
->computeKnownBits(Op0
);
720 KnownBits RKnown
= CurDAG
->computeKnownBits(Op1
);
722 unsigned TargetMask
= LKnown
.Zero
.getZExtValue();
723 unsigned InsertMask
= RKnown
.Zero
.getZExtValue();
725 if ((TargetMask
| InsertMask
) == 0xFFFFFFFF) {
726 unsigned Op0Opc
= Op0
.getOpcode();
727 unsigned Op1Opc
= Op1
.getOpcode();
728 unsigned Value
, SH
= 0;
729 TargetMask
= ~TargetMask
;
730 InsertMask
= ~InsertMask
;
732 // If the LHS has a foldable shift and the RHS does not, then swap it to the
733 // RHS so that we can fold the shift into the insert.
734 if (Op0Opc
== ISD::AND
&& Op1Opc
== ISD::AND
) {
735 if (Op0
.getOperand(0).getOpcode() == ISD::SHL
||
736 Op0
.getOperand(0).getOpcode() == ISD::SRL
) {
737 if (Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
738 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
740 std::swap(Op0Opc
, Op1Opc
);
741 std::swap(TargetMask
, InsertMask
);
744 } else if (Op0Opc
== ISD::SHL
|| Op0Opc
== ISD::SRL
) {
745 if (Op1Opc
== ISD::AND
&& Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
746 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
748 std::swap(Op0Opc
, Op1Opc
);
749 std::swap(TargetMask
, InsertMask
);
754 if (isRunOfOnes(InsertMask
, MB
, ME
)) {
755 if ((Op1Opc
== ISD::SHL
|| Op1Opc
== ISD::SRL
) &&
756 isInt32Immediate(Op1
.getOperand(1), Value
)) {
757 Op1
= Op1
.getOperand(0);
758 SH
= (Op1Opc
== ISD::SHL
) ? Value
: 32 - Value
;
760 if (Op1Opc
== ISD::AND
) {
761 // The AND mask might not be a constant, and we need to make sure that
762 // if we're going to fold the masking with the insert, all bits not
763 // know to be zero in the mask are known to be one.
764 KnownBits MKnown
= CurDAG
->computeKnownBits(Op1
.getOperand(1));
765 bool CanFoldMask
= InsertMask
== MKnown
.One
.getZExtValue();
767 unsigned SHOpc
= Op1
.getOperand(0).getOpcode();
768 if ((SHOpc
== ISD::SHL
|| SHOpc
== ISD::SRL
) && CanFoldMask
&&
769 isInt32Immediate(Op1
.getOperand(0).getOperand(1), Value
)) {
770 // Note that Value must be in range here (less than 32) because
771 // otherwise there would not be any bits set in InsertMask.
772 Op1
= Op1
.getOperand(0).getOperand(0);
773 SH
= (SHOpc
== ISD::SHL
) ? Value
: 32 - Value
;
778 SDValue Ops
[] = { Op0
, Op1
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
780 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
787 // Predict the number of instructions that would be generated by calling
789 static unsigned selectI64ImmInstrCountDirect(int64_t Imm
) {
790 // Assume no remaining bits.
791 unsigned Remainder
= 0;
792 // Assume no shift required.
795 // If it can't be represented as a 32 bit value.
796 if (!isInt
<32>(Imm
)) {
797 Shift
= countTrailingZeros
<uint64_t>(Imm
);
798 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
800 // If the shifted value fits 32 bits.
801 if (isInt
<32>(ImmSh
)) {
802 // Go with the shifted value.
805 // Still stuck with a 64 bit value.
812 // Intermediate operand.
815 // Handle first 32 bits.
816 unsigned Lo
= Imm
& 0xFFFF;
819 if (isInt
<16>(Imm
)) {
823 // Handle the Hi bits and Lo bits.
830 // If no shift, we're done.
831 if (!Shift
) return Result
;
833 // If Hi word == Lo word,
834 // we can use rldimi to insert the Lo word into Hi word.
835 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
840 // Shift for next step if the upper 32-bits were not zero.
844 // Add in the last bits as required.
845 if ((Remainder
>> 16) & 0xFFFF)
847 if (Remainder
& 0xFFFF)
853 static uint64_t Rot64(uint64_t Imm
, unsigned R
) {
854 return (Imm
<< R
) | (Imm
>> (64 - R
));
857 static unsigned selectI64ImmInstrCount(int64_t Imm
) {
858 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
860 // If the instruction count is 1 or 2, we do not need further analysis
861 // since rotate + load constant requires at least 2 instructions.
865 for (unsigned r
= 1; r
< 63; ++r
) {
866 uint64_t RImm
= Rot64(Imm
, r
);
867 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
868 Count
= std::min(Count
, RCount
);
870 // See comments in selectI64Imm for an explanation of the logic below.
871 unsigned LS
= findLastSet(RImm
);
875 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
876 uint64_t RImmWithOnes
= RImm
| OnesMask
;
878 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
879 Count
= std::min(Count
, RCount
);
885 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
886 // (above) needs to be kept in sync with this function.
887 static SDNode
*selectI64ImmDirect(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
889 // Assume no remaining bits.
890 unsigned Remainder
= 0;
891 // Assume no shift required.
894 // If it can't be represented as a 32 bit value.
895 if (!isInt
<32>(Imm
)) {
896 Shift
= countTrailingZeros
<uint64_t>(Imm
);
897 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
899 // If the shifted value fits 32 bits.
900 if (isInt
<32>(ImmSh
)) {
901 // Go with the shifted value.
904 // Still stuck with a 64 bit value.
911 // Intermediate operand.
914 // Handle first 32 bits.
915 unsigned Lo
= Imm
& 0xFFFF;
916 unsigned Hi
= (Imm
>> 16) & 0xFFFF;
918 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
919 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
923 if (isInt
<16>(Imm
)) {
924 uint64_t SextImm
= SignExtend64(Lo
, 16);
925 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
927 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
929 // Handle the Hi bits.
930 unsigned OpC
= Hi
? PPC::LIS8
: PPC::LI8
;
931 Result
= CurDAG
->getMachineNode(OpC
, dl
, MVT::i64
, getI32Imm(Hi
));
933 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
934 SDValue(Result
, 0), getI32Imm(Lo
));
937 Result
= CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
, getI32Imm(Hi
));
940 // If no shift, we're done.
941 if (!Shift
) return Result
;
943 // If Hi word == Lo word,
944 // we can use rldimi to insert the Lo word into Hi word.
945 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
947 { SDValue(Result
, 0), SDValue(Result
, 0), getI32Imm(Shift
), getI32Imm(0)};
948 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
951 // Shift for next step if the upper 32-bits were not zero.
953 Result
= CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
,
956 getI32Imm(63 - Shift
));
959 // Add in the last bits as required.
960 if ((Hi
= (Remainder
>> 16) & 0xFFFF)) {
961 Result
= CurDAG
->getMachineNode(PPC::ORIS8
, dl
, MVT::i64
,
962 SDValue(Result
, 0), getI32Imm(Hi
));
964 if ((Lo
= Remainder
& 0xFFFF)) {
965 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
966 SDValue(Result
, 0), getI32Imm(Lo
));
972 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
974 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
976 // If the instruction count is 1 or 2, we do not need further analysis
977 // since rotate + load constant requires at least 2 instructions.
979 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
986 for (unsigned r
= 1; r
< 63; ++r
) {
987 uint64_t RImm
= Rot64(Imm
, r
);
988 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
989 if (RCount
< Count
) {
996 // If the immediate to generate has many trailing zeros, it might be
997 // worthwhile to generate a rotated value with too many leading ones
998 // (because that's free with li/lis's sign-extension semantics), and then
999 // mask them off after rotation.
1001 unsigned LS
= findLastSet(RImm
);
1002 // We're adding (63-LS) higher-order ones, and we expect to mask them off
1003 // after performing the inverse rotation by (64-r). So we need that:
1004 // 63-LS == 64-r => LS == r-1
1008 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
1009 uint64_t RImmWithOnes
= RImm
| OnesMask
;
1011 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
1012 if (RCount
< Count
) {
1015 MatImm
= RImmWithOnes
;
1021 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
1023 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
1024 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1027 SDValue Val
= SDValue(selectI64ImmDirect(CurDAG
, dl
, MatImm
), 0);
1028 return CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Val
,
1029 getI32Imm(64 - RMin
), getI32Imm(MaskEnd
));
1032 static unsigned allUsesTruncate(SelectionDAG
*CurDAG
, SDNode
*N
) {
1033 unsigned MaxTruncation
= 0;
1034 // Cannot use range-based for loop here as we need the actual use (i.e. we
1035 // need the operand number corresponding to the use). A range-based for
1036 // will unbox the use and provide an SDNode*.
1037 for (SDNode::use_iterator Use
= N
->use_begin(), UseEnd
= N
->use_end();
1038 Use
!= UseEnd
; ++Use
) {
1040 Use
->isMachineOpcode() ? Use
->getMachineOpcode() : Use
->getOpcode();
1044 if (Use
->isMachineOpcode())
1047 std::max(MaxTruncation
, Use
->getValueType(0).getSizeInBits());
1050 if (Use
->isMachineOpcode())
1052 StoreSDNode
*STN
= cast
<StoreSDNode
>(*Use
);
1053 unsigned MemVTSize
= STN
->getMemoryVT().getSizeInBits();
1054 if (MemVTSize
== 64 || Use
.getOperandNo() != 0)
1056 MaxTruncation
= std::max(MaxTruncation
, MemVTSize
);
1063 if (Use
.getOperandNo() != 0)
1065 MaxTruncation
= std::max(MaxTruncation
, 32u);
1071 if (Use
.getOperandNo() != 0)
1073 MaxTruncation
= std::max(MaxTruncation
, 16u);
1079 if (Use
.getOperandNo() != 0)
1081 MaxTruncation
= std::max(MaxTruncation
, 8u);
1085 return MaxTruncation
;
1088 // Select a 64-bit constant.
1089 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, SDNode
*N
) {
1092 // Get 64 bit value.
1093 int64_t Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
1094 if (unsigned MinSize
= allUsesTruncate(CurDAG
, N
)) {
1095 uint64_t SextImm
= SignExtend64(Imm
, MinSize
);
1096 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
1097 if (isInt
<16>(SextImm
))
1098 return CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
1100 return selectI64Imm(CurDAG
, dl
, Imm
);
1105 class BitPermutationSelector
{
1109 // The bit number in the value, using a convention where bit 0 is the
1110 // lowest-order bit.
1113 // ConstZero means a bit we need to mask off.
1114 // Variable is a bit comes from an input variable.
1115 // VariableKnownToBeZero is also a bit comes from an input variable,
1116 // but it is known to be already zero. So we do not need to mask them.
1120 VariableKnownToBeZero
1123 ValueBit(SDValue V
, unsigned I
, Kind K
= Variable
)
1124 : V(V
), Idx(I
), K(K
) {}
1125 ValueBit(Kind K
= Variable
)
1126 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX
), K(K
) {}
1128 bool isZero() const {
1129 return K
== ConstZero
|| K
== VariableKnownToBeZero
;
1132 bool hasValue() const {
1133 return K
== Variable
|| K
== VariableKnownToBeZero
;
1136 SDValue
getValue() const {
1137 assert(hasValue() && "Cannot get the value of a constant bit");
1141 unsigned getValueBitIndex() const {
1142 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1147 // A bit group has the same underlying value and the same rotate factor.
1151 unsigned StartIdx
, EndIdx
;
1153 // This rotation amount assumes that the lower 32 bits of the quantity are
1154 // replicated in the high 32 bits by the rotation operator (which is done
1155 // by rlwinm and friends in 64-bit mode).
1157 // Did converting to Repl32 == true change the rotation factor? If it did,
1158 // it decreased it by 32.
1160 // Was this group coalesced after setting Repl32 to true?
1161 bool Repl32Coalesced
;
1163 BitGroup(SDValue V
, unsigned R
, unsigned S
, unsigned E
)
1164 : V(V
), RLAmt(R
), StartIdx(S
), EndIdx(E
), Repl32(false), Repl32CR(false),
1165 Repl32Coalesced(false) {
1166 LLVM_DEBUG(dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R
1167 << " [" << S
<< ", " << E
<< "]\n");
1171 // Information on each (Value, RLAmt) pair (like the number of groups
1172 // associated with each) used to choose the lowering method.
1173 struct ValueRotInfo
{
1175 unsigned RLAmt
= std::numeric_limits
<unsigned>::max();
1176 unsigned NumGroups
= 0;
1177 unsigned FirstGroupStartIdx
= std::numeric_limits
<unsigned>::max();
1178 bool Repl32
= false;
1180 ValueRotInfo() = default;
1182 // For sorting (in reverse order) by NumGroups, and then by
1183 // FirstGroupStartIdx.
1184 bool operator < (const ValueRotInfo
&Other
) const {
1185 // We need to sort so that the non-Repl32 come first because, when we're
1186 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1187 // masking operation.
1188 if (Repl32
< Other
.Repl32
)
1190 else if (Repl32
> Other
.Repl32
)
1192 else if (NumGroups
> Other
.NumGroups
)
1194 else if (NumGroups
< Other
.NumGroups
)
1196 else if (RLAmt
== 0 && Other
.RLAmt
!= 0)
1198 else if (RLAmt
!= 0 && Other
.RLAmt
== 0)
1200 else if (FirstGroupStartIdx
< Other
.FirstGroupStartIdx
)
1206 using ValueBitsMemoizedValue
= std::pair
<bool, SmallVector
<ValueBit
, 64>>;
1207 using ValueBitsMemoizer
=
1208 DenseMap
<SDValue
, std::unique_ptr
<ValueBitsMemoizedValue
>>;
1209 ValueBitsMemoizer Memoizer
;
1211 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1212 // The bool is true if something interesting was deduced, otherwise if we're
1213 // providing only a generic representation of V (or something else likewise
1214 // uninteresting for instruction selection) through the SmallVector.
1215 std::pair
<bool, SmallVector
<ValueBit
, 64> *> getValueBits(SDValue V
,
1217 auto &ValueEntry
= Memoizer
[V
];
1219 return std::make_pair(ValueEntry
->first
, &ValueEntry
->second
);
1220 ValueEntry
.reset(new ValueBitsMemoizedValue());
1221 bool &Interesting
= ValueEntry
->first
;
1222 SmallVector
<ValueBit
, 64> &Bits
= ValueEntry
->second
;
1223 Bits
.resize(NumBits
);
1225 switch (V
.getOpcode()) {
1228 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1229 unsigned RotAmt
= V
.getConstantOperandVal(1);
1231 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1233 for (unsigned i
= 0; i
< NumBits
; ++i
)
1234 Bits
[i
] = LHSBits
[i
< RotAmt
? i
+ (NumBits
- RotAmt
) : i
- RotAmt
];
1236 return std::make_pair(Interesting
= true, &Bits
);
1240 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1241 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1243 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1245 for (unsigned i
= ShiftAmt
; i
< NumBits
; ++i
)
1246 Bits
[i
] = LHSBits
[i
- ShiftAmt
];
1248 for (unsigned i
= 0; i
< ShiftAmt
; ++i
)
1249 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1251 return std::make_pair(Interesting
= true, &Bits
);
1255 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1256 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1258 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1260 for (unsigned i
= 0; i
< NumBits
- ShiftAmt
; ++i
)
1261 Bits
[i
] = LHSBits
[i
+ ShiftAmt
];
1263 for (unsigned i
= NumBits
- ShiftAmt
; i
< NumBits
; ++i
)
1264 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1266 return std::make_pair(Interesting
= true, &Bits
);
1270 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1271 uint64_t Mask
= V
.getConstantOperandVal(1);
1273 const SmallVector
<ValueBit
, 64> *LHSBits
;
1274 // Mark this as interesting, only if the LHS was also interesting. This
1275 // prevents the overall procedure from matching a single immediate 'and'
1276 // (which is non-optimal because such an and might be folded with other
1277 // things if we don't select it here).
1278 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0), NumBits
);
1280 for (unsigned i
= 0; i
< NumBits
; ++i
)
1281 if (((Mask
>> i
) & 1) == 1)
1282 Bits
[i
] = (*LHSBits
)[i
];
1284 // AND instruction masks this bit. If the input is already zero,
1285 // we have nothing to do here. Otherwise, make the bit ConstZero.
1286 if ((*LHSBits
)[i
].isZero())
1287 Bits
[i
] = (*LHSBits
)[i
];
1289 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1292 return std::make_pair(Interesting
, &Bits
);
1296 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1297 const auto &RHSBits
= *getValueBits(V
.getOperand(1), NumBits
).second
;
1299 bool AllDisjoint
= true;
1300 SDValue LastVal
= SDValue();
1301 unsigned LastIdx
= 0;
1302 for (unsigned i
= 0; i
< NumBits
; ++i
) {
1303 if (LHSBits
[i
].isZero() && RHSBits
[i
].isZero()) {
1304 // If both inputs are known to be zero and one is ConstZero and
1305 // another is VariableKnownToBeZero, we can select whichever
1306 // we like. To minimize the number of bit groups, we select
1307 // VariableKnownToBeZero if this bit is the next bit of the same
1308 // input variable from the previous bit. Otherwise, we select
1310 if (LHSBits
[i
].hasValue() && LHSBits
[i
].getValue() == LastVal
&&
1311 LHSBits
[i
].getValueBitIndex() == LastIdx
+ 1)
1312 Bits
[i
] = LHSBits
[i
];
1313 else if (RHSBits
[i
].hasValue() && RHSBits
[i
].getValue() == LastVal
&&
1314 RHSBits
[i
].getValueBitIndex() == LastIdx
+ 1)
1315 Bits
[i
] = RHSBits
[i
];
1317 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1319 else if (LHSBits
[i
].isZero())
1320 Bits
[i
] = RHSBits
[i
];
1321 else if (RHSBits
[i
].isZero())
1322 Bits
[i
] = LHSBits
[i
];
1324 AllDisjoint
= false;
1327 // We remember the value and bit index of this bit.
1328 if (Bits
[i
].hasValue()) {
1329 LastVal
= Bits
[i
].getValue();
1330 LastIdx
= Bits
[i
].getValueBitIndex();
1333 if (LastVal
) LastVal
= SDValue();
1341 return std::make_pair(Interesting
= true, &Bits
);
1343 case ISD::ZERO_EXTEND
: {
1344 // We support only the case with zero extension from i32 to i64 so far.
1345 if (V
.getValueType() != MVT::i64
||
1346 V
.getOperand(0).getValueType() != MVT::i32
)
1349 const SmallVector
<ValueBit
, 64> *LHSBits
;
1350 const unsigned NumOperandBits
= 32;
1351 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1354 for (unsigned i
= 0; i
< NumOperandBits
; ++i
)
1355 Bits
[i
] = (*LHSBits
)[i
];
1357 for (unsigned i
= NumOperandBits
; i
< NumBits
; ++i
)
1358 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1360 return std::make_pair(Interesting
, &Bits
);
1362 case ISD::TRUNCATE
: {
1363 EVT FromType
= V
.getOperand(0).getValueType();
1364 EVT ToType
= V
.getValueType();
1365 // We support only the case with truncate from i64 to i32.
1366 if (FromType
!= MVT::i64
|| ToType
!= MVT::i32
)
1368 const unsigned NumAllBits
= FromType
.getSizeInBits();
1369 SmallVector
<ValueBit
, 64> *InBits
;
1370 std::tie(Interesting
, InBits
) = getValueBits(V
.getOperand(0),
1372 const unsigned NumValidBits
= ToType
.getSizeInBits();
1374 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1375 // So, we cannot include this truncate.
1376 bool UseUpper32bit
= false;
1377 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1378 if ((*InBits
)[i
].hasValue() && (*InBits
)[i
].getValueBitIndex() >= 32) {
1379 UseUpper32bit
= true;
1385 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1386 Bits
[i
] = (*InBits
)[i
];
1388 return std::make_pair(Interesting
, &Bits
);
1390 case ISD::AssertZext
: {
1391 // For AssertZext, we look through the operand and
1392 // mark the bits known to be zero.
1393 const SmallVector
<ValueBit
, 64> *LHSBits
;
1394 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1397 EVT FromType
= cast
<VTSDNode
>(V
.getOperand(1))->getVT();
1398 const unsigned NumValidBits
= FromType
.getSizeInBits();
1399 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1400 Bits
[i
] = (*LHSBits
)[i
];
1402 // These bits are known to be zero.
1403 for (unsigned i
= NumValidBits
; i
< NumBits
; ++i
)
1404 Bits
[i
] = ValueBit((*LHSBits
)[i
].getValue(),
1405 (*LHSBits
)[i
].getValueBitIndex(),
1406 ValueBit::VariableKnownToBeZero
);
1408 return std::make_pair(Interesting
, &Bits
);
1411 LoadSDNode
*LD
= cast
<LoadSDNode
>(V
);
1412 if (ISD::isZEXTLoad(V
.getNode()) && V
.getResNo() == 0) {
1413 EVT VT
= LD
->getMemoryVT();
1414 const unsigned NumValidBits
= VT
.getSizeInBits();
1416 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1417 Bits
[i
] = ValueBit(V
, i
);
1419 // These bits are known to be zero.
1420 for (unsigned i
= NumValidBits
; i
< NumBits
; ++i
)
1421 Bits
[i
] = ValueBit(V
, i
, ValueBit::VariableKnownToBeZero
);
1423 // Zero-extending load itself cannot be optimized. So, it is not
1424 // interesting by itself though it gives useful information.
1425 return std::make_pair(Interesting
= false, &Bits
);
1430 for (unsigned i
= 0; i
< NumBits
; ++i
)
1431 Bits
[i
] = ValueBit(V
, i
);
1433 return std::make_pair(Interesting
= false, &Bits
);
1436 // For each value (except the constant ones), compute the left-rotate amount
1437 // to get it from its original to final position.
1438 void computeRotationAmounts() {
1440 RLAmt
.resize(Bits
.size());
1441 for (unsigned i
= 0; i
< Bits
.size(); ++i
)
1442 if (Bits
[i
].hasValue()) {
1443 unsigned VBI
= Bits
[i
].getValueBitIndex();
1447 RLAmt
[i
] = Bits
.size() - (VBI
- i
);
1448 } else if (Bits
[i
].isZero()) {
1450 RLAmt
[i
] = UINT32_MAX
;
1452 llvm_unreachable("Unknown value bit type");
1456 // Collect groups of consecutive bits with the same underlying value and
1457 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1458 // they break up groups.
1459 void collectBitGroups(bool LateMask
) {
1462 unsigned LastRLAmt
= RLAmt
[0];
1463 SDValue LastValue
= Bits
[0].hasValue() ? Bits
[0].getValue() : SDValue();
1464 unsigned LastGroupStartIdx
= 0;
1465 bool IsGroupOfZeros
= !Bits
[LastGroupStartIdx
].hasValue();
1466 for (unsigned i
= 1; i
< Bits
.size(); ++i
) {
1467 unsigned ThisRLAmt
= RLAmt
[i
];
1468 SDValue ThisValue
= Bits
[i
].hasValue() ? Bits
[i
].getValue() : SDValue();
1469 if (LateMask
&& !ThisValue
) {
1470 ThisValue
= LastValue
;
1471 ThisRLAmt
= LastRLAmt
;
1472 // If we're doing late masking, then the first bit group always starts
1473 // at zero (even if the first bits were zero).
1474 if (BitGroups
.empty())
1475 LastGroupStartIdx
= 0;
1478 // If this bit is known to be zero and the current group is a bit group
1479 // of zeros, we do not need to terminate the current bit group even the
1480 // Value or RLAmt does not match here. Instead, we terminate this group
1481 // when the first non-zero bit appears later.
1482 if (IsGroupOfZeros
&& Bits
[i
].isZero())
1485 // If this bit has the same underlying value and the same rotate factor as
1486 // the last one, then they're part of the same group.
1487 if (ThisRLAmt
== LastRLAmt
&& ThisValue
== LastValue
)
1488 // We cannot continue the current group if this bits is not known to
1489 // be zero in a bit group of zeros.
1490 if (!(IsGroupOfZeros
&& ThisValue
&& !Bits
[i
].isZero()))
1493 if (LastValue
.getNode())
1494 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1496 LastRLAmt
= ThisRLAmt
;
1497 LastValue
= ThisValue
;
1498 LastGroupStartIdx
= i
;
1499 IsGroupOfZeros
= !Bits
[LastGroupStartIdx
].hasValue();
1501 if (LastValue
.getNode())
1502 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1505 if (BitGroups
.empty())
1508 // We might be able to combine the first and last groups.
1509 if (BitGroups
.size() > 1) {
1510 // If the first and last groups are the same, then remove the first group
1511 // in favor of the last group, making the ending index of the last group
1512 // equal to the ending index of the to-be-removed first group.
1513 if (BitGroups
[0].StartIdx
== 0 &&
1514 BitGroups
[BitGroups
.size()-1].EndIdx
== Bits
.size()-1 &&
1515 BitGroups
[0].V
== BitGroups
[BitGroups
.size()-1].V
&&
1516 BitGroups
[0].RLAmt
== BitGroups
[BitGroups
.size()-1].RLAmt
) {
1517 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1518 BitGroups
[BitGroups
.size()-1].EndIdx
= BitGroups
[0].EndIdx
;
1519 BitGroups
.erase(BitGroups
.begin());
1524 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1525 // associated with each. If the number of groups are same, we prefer a group
1526 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1527 // instruction. If there is a degeneracy, pick the one that occurs
1528 // first (in the final value).
1529 void collectValueRotInfo() {
1532 for (auto &BG
: BitGroups
) {
1533 unsigned RLAmtKey
= BG
.RLAmt
+ (BG
.Repl32
? 64 : 0);
1534 ValueRotInfo
&VRI
= ValueRots
[std::make_pair(BG
.V
, RLAmtKey
)];
1536 VRI
.RLAmt
= BG
.RLAmt
;
1537 VRI
.Repl32
= BG
.Repl32
;
1539 VRI
.FirstGroupStartIdx
= std::min(VRI
.FirstGroupStartIdx
, BG
.StartIdx
);
1542 // Now that we've collected the various ValueRotInfo instances, we need to
1544 ValueRotsVec
.clear();
1545 for (auto &I
: ValueRots
) {
1546 ValueRotsVec
.push_back(I
.second
);
1548 llvm::sort(ValueRotsVec
);
1551 // In 64-bit mode, rlwinm and friends have a rotation operator that
1552 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1553 // indices of these instructions can only be in the lower 32 bits, so they
1554 // can only represent some 64-bit bit groups. However, when they can be used,
1555 // the 32-bit replication can be used to represent, as a single bit group,
1556 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1557 // groups when possible. Returns true if any of the bit groups were
1559 void assignRepl32BitGroups() {
1560 // If we have bits like this:
1562 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1563 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1564 // Groups: | RLAmt = 8 | RLAmt = 40 |
1566 // But, making use of a 32-bit operation that replicates the low-order 32
1567 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1570 auto IsAllLow32
= [this](BitGroup
& BG
) {
1571 if (BG
.StartIdx
<= BG
.EndIdx
) {
1572 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
) {
1573 if (!Bits
[i
].hasValue())
1575 if (Bits
[i
].getValueBitIndex() >= 32)
1579 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
) {
1580 if (!Bits
[i
].hasValue())
1582 if (Bits
[i
].getValueBitIndex() >= 32)
1585 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
) {
1586 if (!Bits
[i
].hasValue())
1588 if (Bits
[i
].getValueBitIndex() >= 32)
1596 for (auto &BG
: BitGroups
) {
1597 // If this bit group has RLAmt of 0 and will not be merged with
1598 // another bit group, we don't benefit from Repl32. We don't mark
1599 // such group to give more freedom for later instruction selection.
1600 if (BG
.RLAmt
== 0) {
1601 auto PotentiallyMerged
= [this](BitGroup
& BG
) {
1602 for (auto &BG2
: BitGroups
)
1603 if (&BG
!= &BG2
&& BG
.V
== BG2
.V
&&
1604 (BG2
.RLAmt
== 0 || BG2
.RLAmt
== 32))
1608 if (!PotentiallyMerged(BG
))
1611 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32) {
1612 if (IsAllLow32(BG
)) {
1613 if (BG
.RLAmt
>= 32) {
1620 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1621 << BG
.V
.getNode() << " RLAmt = " << BG
.RLAmt
<< " ["
1622 << BG
.StartIdx
<< ", " << BG
.EndIdx
<< "]\n");
1627 // Now walk through the bit groups, consolidating where possible.
1628 for (auto I
= BitGroups
.begin(); I
!= BitGroups
.end();) {
1629 // We might want to remove this bit group by merging it with the previous
1630 // group (which might be the ending group).
1631 auto IP
= (I
== BitGroups
.begin()) ?
1632 std::prev(BitGroups
.end()) : std::prev(I
);
1633 if (I
->Repl32
&& IP
->Repl32
&& I
->V
== IP
->V
&& I
->RLAmt
== IP
->RLAmt
&&
1634 I
->StartIdx
== (IP
->EndIdx
+ 1) % 64 && I
!= IP
) {
1636 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1637 << I
->V
.getNode() << " RLAmt = " << I
->RLAmt
<< " ["
1638 << I
->StartIdx
<< ", " << I
->EndIdx
1639 << "] with group with range [" << IP
->StartIdx
<< ", "
1640 << IP
->EndIdx
<< "]\n");
1642 IP
->EndIdx
= I
->EndIdx
;
1643 IP
->Repl32CR
= IP
->Repl32CR
|| I
->Repl32CR
;
1644 IP
->Repl32Coalesced
= true;
1645 I
= BitGroups
.erase(I
);
1648 // There is a special case worth handling: If there is a single group
1649 // covering the entire upper 32 bits, and it can be merged with both
1650 // the next and previous groups (which might be the same group), then
1651 // do so. If it is the same group (so there will be only one group in
1652 // total), then we need to reverse the order of the range so that it
1653 // covers the entire 64 bits.
1654 if (I
->StartIdx
== 32 && I
->EndIdx
== 63) {
1655 assert(std::next(I
) == BitGroups
.end() &&
1656 "bit group ends at index 63 but there is another?");
1657 auto IN
= BitGroups
.begin();
1659 if (IP
->Repl32
&& IN
->Repl32
&& I
->V
== IP
->V
&& I
->V
== IN
->V
&&
1660 (I
->RLAmt
% 32) == IP
->RLAmt
&& (I
->RLAmt
% 32) == IN
->RLAmt
&&
1661 IP
->EndIdx
== 31 && IN
->StartIdx
== 0 && I
!= IP
&&
1664 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I
->V
.getNode()
1665 << " RLAmt = " << I
->RLAmt
<< " [" << I
->StartIdx
1666 << ", " << I
->EndIdx
1667 << "] with 32-bit replicated groups with ranges ["
1668 << IP
->StartIdx
<< ", " << IP
->EndIdx
<< "] and ["
1669 << IN
->StartIdx
<< ", " << IN
->EndIdx
<< "]\n");
1672 // There is only one other group; change it to cover the whole
1673 // range (backward, so that it can still be Repl32 but cover the
1674 // whole 64-bit range).
1677 IP
->Repl32CR
= IP
->Repl32CR
|| I
->RLAmt
>= 32;
1678 IP
->Repl32Coalesced
= true;
1679 I
= BitGroups
.erase(I
);
1681 // There are two separate groups, one before this group and one
1682 // after us (at the beginning). We're going to remove this group,
1683 // but also the group at the very beginning.
1684 IP
->EndIdx
= IN
->EndIdx
;
1685 IP
->Repl32CR
= IP
->Repl32CR
|| IN
->Repl32CR
|| I
->RLAmt
>= 32;
1686 IP
->Repl32Coalesced
= true;
1687 I
= BitGroups
.erase(I
);
1688 BitGroups
.erase(BitGroups
.begin());
1691 // This must be the last group in the vector (and we might have
1692 // just invalidated the iterator above), so break here.
1702 SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
1703 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1706 uint64_t getZerosMask() {
1708 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1709 if (Bits
[i
].hasValue())
1711 Mask
|= (UINT64_C(1) << i
);
1717 // This method extends an input value to 64 bit if input is 32-bit integer.
1718 // While selecting instructions in BitPermutationSelector in 64-bit mode,
1719 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1720 // In such case, we extend it to 64 bit to be consistent with other values.
1721 SDValue
ExtendToInt64(SDValue V
, const SDLoc
&dl
) {
1722 if (V
.getValueSizeInBits() == 64)
1725 assert(V
.getValueSizeInBits() == 32);
1726 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1727 SDValue ImDef
= SDValue(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
1729 SDValue ExtVal
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
1735 SDValue
TruncateToInt32(SDValue V
, const SDLoc
&dl
) {
1736 if (V
.getValueSizeInBits() == 32)
1739 assert(V
.getValueSizeInBits() == 64);
1740 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1741 SDValue SubVal
= SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
,
1742 MVT::i32
, V
, SubRegIdx
), 0);
1746 // Depending on the number of groups for a particular value, it might be
1747 // better to rotate, mask explicitly (using andi/andis), and then or the
1748 // result. Select this part of the result first.
1749 void SelectAndParts32(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1750 if (BPermRewriterNoMasking
)
1753 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1755 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1756 if (!Bits
[i
].hasValue() || Bits
[i
].getValue() != VRI
.V
)
1758 if (RLAmt
[i
] != VRI
.RLAmt
)
1763 // Compute the masks for andi/andis that would be necessary.
1764 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1765 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1766 "No set bits in mask for value bit groups");
1767 bool NeedsRotate
= VRI
.RLAmt
!= 0;
1769 // We're trying to minimize the number of instructions. If we have one
1770 // group, using one of andi/andis can break even. If we have three
1771 // groups, we can use both andi and andis and break even (to use both
1772 // andi and andis we also need to or the results together). We need four
1773 // groups if we also need to rotate. To use andi/andis we need to do more
1774 // than break even because rotate-and-mask instructions tend to be easier
1777 // FIXME: We've biased here against using andi/andis, which is right for
1778 // POWER cores, but not optimal everywhere. For example, on the A2,
1779 // andi/andis have single-cycle latency whereas the rotate-and-mask
1780 // instructions take two cycles, and it would be better to bias toward
1781 // andi/andis in break-even cases.
1783 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
1784 (unsigned) (ANDIMask
!= 0) +
1785 (unsigned) (ANDISMask
!= 0) +
1786 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0) +
1787 (unsigned) (bool) Res
;
1789 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
1790 << " RL: " << VRI
.RLAmt
<< ":"
1791 << "\n\t\t\tisel using masking: " << NumAndInsts
1792 << " using rotates: " << VRI
.NumGroups
<< "\n");
1794 if (NumAndInsts
>= VRI
.NumGroups
)
1797 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1799 if (InstCnt
) *InstCnt
+= NumAndInsts
;
1804 { TruncateToInt32(VRI
.V
, dl
), getI32Imm(VRI
.RLAmt
, dl
),
1805 getI32Imm(0, dl
), getI32Imm(31, dl
) };
1806 VRot
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
1809 VRot
= TruncateToInt32(VRI
.V
, dl
);
1812 SDValue ANDIVal
, ANDISVal
;
1814 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1815 VRot
, getI32Imm(ANDIMask
, dl
)), 0);
1817 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1818 VRot
, getI32Imm(ANDISMask
, dl
)), 0);
1822 TotalVal
= ANDISVal
;
1826 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1827 ANDIVal
, ANDISVal
), 0);
1832 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1835 // Now, remove all groups with this underlying value and rotation
1837 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1838 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1843 // Instruction selection for the 32-bit case.
1844 SDNode
*Select32(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
1848 if (InstCnt
) *InstCnt
= 0;
1850 // Take care of cases that should use andi/andis first.
1851 SelectAndParts32(dl
, Res
, InstCnt
);
1853 // If we've not yet selected a 'starting' instruction, and we have no zeros
1854 // to fill in, select the (Value, RLAmt) with the highest priority (largest
1855 // number of groups), and start with this rotated value.
1856 if ((!NeedMask
|| LateMask
) && !Res
) {
1857 ValueRotInfo
&VRI
= ValueRotsVec
[0];
1859 if (InstCnt
) *InstCnt
+= 1;
1861 { TruncateToInt32(VRI
.V
, dl
), getI32Imm(VRI
.RLAmt
, dl
),
1862 getI32Imm(0, dl
), getI32Imm(31, dl
) };
1863 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
),
1866 Res
= TruncateToInt32(VRI
.V
, dl
);
1869 // Now, remove all groups with this underlying value and rotation factor.
1870 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1871 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1875 if (InstCnt
) *InstCnt
+= BitGroups
.size();
1877 // Insert the other groups (one at a time).
1878 for (auto &BG
: BitGroups
) {
1881 { TruncateToInt32(BG
.V
, dl
), getI32Imm(BG
.RLAmt
, dl
),
1882 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1883 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1884 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
1887 { Res
, TruncateToInt32(BG
.V
, dl
), getI32Imm(BG
.RLAmt
, dl
),
1888 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1889 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1890 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
), 0);
1895 unsigned Mask
= (unsigned) getZerosMask();
1897 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1898 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1899 "No set bits in zeros mask?");
1901 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
1902 (unsigned) (ANDISMask
!= 0) +
1903 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1905 SDValue ANDIVal
, ANDISVal
;
1907 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1908 Res
, getI32Imm(ANDIMask
, dl
)), 0);
1910 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1911 Res
, getI32Imm(ANDISMask
, dl
)), 0);
1918 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1919 ANDIVal
, ANDISVal
), 0);
1922 return Res
.getNode();
1925 unsigned SelectRotMask64Count(unsigned RLAmt
, bool Repl32
,
1926 unsigned MaskStart
, unsigned MaskEnd
,
1928 // In the notation used by the instructions, 'start' and 'end' are reversed
1929 // because bits are counted from high to low order.
1930 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1931 InstMaskEnd
= 64 - MaskStart
- 1;
1936 if ((!IsIns
&& (InstMaskEnd
== 63 || InstMaskStart
== 0)) ||
1937 InstMaskEnd
== 63 - RLAmt
)
1943 // For 64-bit values, not all combinations of rotates and masks are
1944 // available. Produce one if it is available.
1945 SDValue
SelectRotMask64(SDValue V
, const SDLoc
&dl
, unsigned RLAmt
,
1946 bool Repl32
, unsigned MaskStart
, unsigned MaskEnd
,
1947 unsigned *InstCnt
= nullptr) {
1948 // In the notation used by the instructions, 'start' and 'end' are reversed
1949 // because bits are counted from high to low order.
1950 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1951 InstMaskEnd
= 64 - MaskStart
- 1;
1953 if (InstCnt
) *InstCnt
+= 1;
1956 // This rotation amount assumes that the lower 32 bits of the quantity
1957 // are replicated in the high 32 bits by the rotation operator (which is
1958 // done by rlwinm and friends).
1959 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1960 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1962 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1963 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
1964 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM8
, dl
, MVT::i64
,
1968 if (InstMaskEnd
== 63) {
1970 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1971 getI32Imm(InstMaskStart
, dl
) };
1972 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Ops
), 0);
1975 if (InstMaskStart
== 0) {
1977 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1978 getI32Imm(InstMaskEnd
, dl
) };
1979 return SDValue(CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Ops
), 0);
1982 if (InstMaskEnd
== 63 - RLAmt
) {
1984 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
1985 getI32Imm(InstMaskStart
, dl
) };
1986 return SDValue(CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, Ops
), 0);
1989 // We cannot do this with a single instruction, so we'll use two. The
1990 // problem is that we're not free to choose both a rotation amount and mask
1991 // start and end independently. We can choose an arbitrary mask start and
1992 // end, but then the rotation amount is fixed. Rotation, however, can be
1993 // inverted, and so by applying an "inverse" rotation first, we can get the
1995 if (InstCnt
) *InstCnt
+= 1;
1997 // The rotation mask for the second instruction must be MaskStart.
1998 unsigned RLAmt2
= MaskStart
;
1999 // The first instruction must rotate V so that the overall rotation amount
2001 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
2003 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
2004 return SelectRotMask64(V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
2007 // For 64-bit values, not all combinations of rotates and masks are
2008 // available. Produce a rotate-mask-and-insert if one is available.
2009 SDValue
SelectRotMaskIns64(SDValue Base
, SDValue V
, const SDLoc
&dl
,
2010 unsigned RLAmt
, bool Repl32
, unsigned MaskStart
,
2011 unsigned MaskEnd
, unsigned *InstCnt
= nullptr) {
2012 // In the notation used by the instructions, 'start' and 'end' are reversed
2013 // because bits are counted from high to low order.
2014 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
2015 InstMaskEnd
= 64 - MaskStart
- 1;
2017 if (InstCnt
) *InstCnt
+= 1;
2020 // This rotation amount assumes that the lower 32 bits of the quantity
2021 // are replicated in the high 32 bits by the rotation operator (which is
2022 // done by rlwinm and friends).
2023 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
2024 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
2026 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2027 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
2028 return SDValue(CurDAG
->getMachineNode(PPC::RLWIMI8
, dl
, MVT::i64
,
2032 if (InstMaskEnd
== 63 - RLAmt
) {
2034 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2035 getI32Imm(InstMaskStart
, dl
) };
2036 return SDValue(CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
), 0);
2039 // We cannot do this with a single instruction, so we'll use two. The
2040 // problem is that we're not free to choose both a rotation amount and mask
2041 // start and end independently. We can choose an arbitrary mask start and
2042 // end, but then the rotation amount is fixed. Rotation, however, can be
2043 // inverted, and so by applying an "inverse" rotation first, we can get the
2045 if (InstCnt
) *InstCnt
+= 1;
2047 // The rotation mask for the second instruction must be MaskStart.
2048 unsigned RLAmt2
= MaskStart
;
2049 // The first instruction must rotate V so that the overall rotation amount
2051 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
2053 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
2054 return SelectRotMaskIns64(Base
, V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
2057 void SelectAndParts64(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
2058 if (BPermRewriterNoMasking
)
2061 // The idea here is the same as in the 32-bit version, but with additional
2062 // complications from the fact that Repl32 might be true. Because we
2063 // aggressively convert bit groups to Repl32 form (which, for small
2064 // rotation factors, involves no other change), and then coalesce, it might
2065 // be the case that a single 64-bit masking operation could handle both
2066 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2067 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2068 // completely capture the new combined bit group.
2070 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
2073 // We need to add to the mask all bits from the associated bit groups.
2074 // If Repl32 is false, we need to add bits from bit groups that have
2075 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2076 // group is trivially convertable if it overlaps only with the lower 32
2077 // bits, and the group has not been coalesced.
2078 auto MatchingBG
= [VRI
](const BitGroup
&BG
) {
2082 unsigned EffRLAmt
= BG
.RLAmt
;
2083 if (!VRI
.Repl32
&& BG
.Repl32
) {
2084 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
<= BG
.EndIdx
&&
2085 !BG
.Repl32Coalesced
) {
2091 } else if (VRI
.Repl32
!= BG
.Repl32
) {
2095 return VRI
.RLAmt
== EffRLAmt
;
2098 for (auto &BG
: BitGroups
) {
2099 if (!MatchingBG(BG
))
2102 if (BG
.StartIdx
<= BG
.EndIdx
) {
2103 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
)
2104 Mask
|= (UINT64_C(1) << i
);
2106 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
)
2107 Mask
|= (UINT64_C(1) << i
);
2108 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
)
2109 Mask
|= (UINT64_C(1) << i
);
2113 // We can use the 32-bit andi/andis technique if the mask does not
2114 // require any higher-order bits. This can save an instruction compared
2115 // to always using the general 64-bit technique.
2116 bool Use32BitInsts
= isUInt
<32>(Mask
);
2117 // Compute the masks for andi/andis that would be necessary.
2118 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2119 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2121 bool NeedsRotate
= VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
));
2123 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
2124 (unsigned) (bool) Res
;
2126 NumAndInsts
+= (unsigned) (ANDIMask
!= 0) + (unsigned) (ANDISMask
!= 0) +
2127 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2129 NumAndInsts
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
2131 unsigned NumRLInsts
= 0;
2132 bool FirstBG
= true;
2133 bool MoreBG
= false;
2134 for (auto &BG
: BitGroups
) {
2135 if (!MatchingBG(BG
)) {
2140 SelectRotMask64Count(BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
, BG
.EndIdx
,
2145 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
2146 << " RL: " << VRI
.RLAmt
<< (VRI
.Repl32
? " (32):" : ":")
2147 << "\n\t\t\tisel using masking: " << NumAndInsts
2148 << " using rotates: " << NumRLInsts
<< "\n");
2150 // When we'd use andi/andis, we bias toward using the rotates (andi only
2151 // has a record form, and is cracked on POWER cores). However, when using
2152 // general 64-bit constant formation, bias toward the constant form,
2153 // because that exposes more opportunities for CSE.
2154 if (NumAndInsts
> NumRLInsts
)
2156 // When merging multiple bit groups, instruction or is used.
2157 // But when rotate is used, rldimi can inert the rotated value into any
2158 // register, so instruction or can be avoided.
2159 if ((Use32BitInsts
|| MoreBG
) && NumAndInsts
== NumRLInsts
)
2162 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2164 if (InstCnt
) *InstCnt
+= NumAndInsts
;
2167 // We actually need to generate a rotation if we have a non-zero rotation
2168 // factor or, in the Repl32 case, if we care about any of the
2169 // higher-order replicated bits. In the latter case, we generate a mask
2170 // backward so that it actually includes the entire 64 bits.
2171 if (VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
)))
2172 VRot
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2173 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63);
2178 if (Use32BitInsts
) {
2179 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2180 "No set bits in mask when using 32-bit ands for 64-bit value");
2182 SDValue ANDIVal
, ANDISVal
;
2184 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
2185 ExtendToInt64(VRot
, dl
),
2186 getI32Imm(ANDIMask
, dl
)),
2189 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
2190 ExtendToInt64(VRot
, dl
),
2191 getI32Imm(ANDISMask
, dl
)),
2195 TotalVal
= ANDISVal
;
2199 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2200 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2202 TotalVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2204 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2205 ExtendToInt64(VRot
, dl
), TotalVal
),
2212 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2213 ExtendToInt64(Res
, dl
), TotalVal
),
2216 // Now, remove all groups with this underlying value and rotation
2218 eraseMatchingBitGroups(MatchingBG
);
2222 // Instruction selection for the 64-bit case.
2223 SDNode
*Select64(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
2227 if (InstCnt
) *InstCnt
= 0;
2229 // Take care of cases that should use andi/andis first.
2230 SelectAndParts64(dl
, Res
, InstCnt
);
2232 // If we've not yet selected a 'starting' instruction, and we have no zeros
2233 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2234 // number of groups), and start with this rotated value.
2235 if ((!NeedMask
|| LateMask
) && !Res
) {
2236 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2237 // groups will come first, and so the VRI representing the largest number
2238 // of groups might not be first (it might be the first Repl32 groups).
2239 unsigned MaxGroupsIdx
= 0;
2240 if (!ValueRotsVec
[0].Repl32
) {
2241 for (unsigned i
= 0, ie
= ValueRotsVec
.size(); i
< ie
; ++i
)
2242 if (ValueRotsVec
[i
].Repl32
) {
2243 if (ValueRotsVec
[i
].NumGroups
> ValueRotsVec
[0].NumGroups
)
2249 ValueRotInfo
&VRI
= ValueRotsVec
[MaxGroupsIdx
];
2250 bool NeedsRotate
= false;
2253 } else if (VRI
.Repl32
) {
2254 for (auto &BG
: BitGroups
) {
2255 if (BG
.V
!= VRI
.V
|| BG
.RLAmt
!= VRI
.RLAmt
||
2256 BG
.Repl32
!= VRI
.Repl32
)
2259 // We don't need a rotate if the bit group is confined to the lower
2261 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
< BG
.EndIdx
)
2270 Res
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2271 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63,
2276 // Now, remove all groups with this underlying value and rotation factor.
2278 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
2279 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
&&
2280 BG
.Repl32
== VRI
.Repl32
;
2284 // Because 64-bit rotates are more flexible than inserts, we might have a
2285 // preference regarding which one we do first (to save one instruction).
2287 for (auto I
= BitGroups
.begin(), IE
= BitGroups
.end(); I
!= IE
; ++I
) {
2288 if (SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2290 SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2292 if (I
!= BitGroups
.begin()) {
2295 BitGroups
.insert(BitGroups
.begin(), BG
);
2302 // Insert the other groups (one at a time).
2303 for (auto &BG
: BitGroups
) {
2305 Res
= SelectRotMask64(BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
,
2306 BG
.EndIdx
, InstCnt
);
2308 Res
= SelectRotMaskIns64(Res
, BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
,
2309 BG
.StartIdx
, BG
.EndIdx
, InstCnt
);
2313 uint64_t Mask
= getZerosMask();
2315 // We can use the 32-bit andi/andis technique if the mask does not
2316 // require any higher-order bits. This can save an instruction compared
2317 // to always using the general 64-bit technique.
2318 bool Use32BitInsts
= isUInt
<32>(Mask
);
2319 // Compute the masks for andi/andis that would be necessary.
2320 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2321 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2323 if (Use32BitInsts
) {
2324 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2325 "No set bits in mask when using 32-bit ands for 64-bit value");
2327 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
2328 (unsigned) (ANDISMask
!= 0) +
2329 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2331 SDValue ANDIVal
, ANDISVal
;
2333 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
2334 ExtendToInt64(Res
, dl
), getI32Imm(ANDIMask
, dl
)), 0);
2336 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
2337 ExtendToInt64(Res
, dl
), getI32Imm(ANDISMask
, dl
)), 0);
2344 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2345 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2347 if (InstCnt
) *InstCnt
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
2349 SDValue MaskVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2351 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2352 ExtendToInt64(Res
, dl
), MaskVal
), 0);
2356 return Res
.getNode();
2359 SDNode
*Select(SDNode
*N
, bool LateMask
, unsigned *InstCnt
= nullptr) {
2360 // Fill in BitGroups.
2361 collectBitGroups(LateMask
);
2362 if (BitGroups
.empty())
2365 // For 64-bit values, figure out when we can use 32-bit instructions.
2366 if (Bits
.size() == 64)
2367 assignRepl32BitGroups();
2369 // Fill in ValueRotsVec.
2370 collectValueRotInfo();
2372 if (Bits
.size() == 32) {
2373 return Select32(N
, LateMask
, InstCnt
);
2375 assert(Bits
.size() == 64 && "Not 64 bits here?");
2376 return Select64(N
, LateMask
, InstCnt
);
2382 void eraseMatchingBitGroups(function_ref
<bool(const BitGroup
&)> F
) {
2383 BitGroups
.erase(remove_if(BitGroups
, F
), BitGroups
.end());
2386 SmallVector
<ValueBit
, 64> Bits
;
2389 SmallVector
<unsigned, 64> RLAmt
;
2391 SmallVector
<BitGroup
, 16> BitGroups
;
2393 DenseMap
<std::pair
<SDValue
, unsigned>, ValueRotInfo
> ValueRots
;
2394 SmallVector
<ValueRotInfo
, 16> ValueRotsVec
;
2396 SelectionDAG
*CurDAG
;
2399 BitPermutationSelector(SelectionDAG
*DAG
)
2402 // Here we try to match complex bit permutations into a set of
2403 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2404 // known to produce optimal code for common cases (like i32 byte swapping).
2405 SDNode
*Select(SDNode
*N
) {
2408 getValueBits(SDValue(N
, 0), N
->getValueType(0).getSizeInBits());
2411 Bits
= std::move(*Result
.second
);
2413 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2414 " selection for: ");
2415 LLVM_DEBUG(N
->dump(CurDAG
));
2417 // Fill it RLAmt and set NeedMask.
2418 computeRotationAmounts();
2421 return Select(N
, false);
2423 // We currently have two techniques for handling results with zeros: early
2424 // masking (the default) and late masking. Late masking is sometimes more
2425 // efficient, but because the structure of the bit groups is different, it
2426 // is hard to tell without generating both and comparing the results. With
2427 // late masking, we ignore zeros in the resulting value when inserting each
2428 // set of bit groups, and then mask in the zeros at the end. With early
2429 // masking, we only insert the non-zero parts of the result at every step.
2431 unsigned InstCnt
= 0, InstCntLateMask
= 0;
2432 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2433 SDNode
*RN
= Select(N
, false, &InstCnt
);
2434 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt
<< " instructions\n");
2436 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2437 SDNode
*RNLM
= Select(N
, true, &InstCntLateMask
);
2438 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2439 << " instructions\n");
2441 if (InstCnt
<= InstCntLateMask
) {
2442 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2446 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2451 class IntegerCompareEliminator
{
2452 SelectionDAG
*CurDAG
;
2454 // Conversion type for interpreting results of a 32-bit instruction as
2455 // a 64-bit value or vice versa.
2456 enum ExtOrTruncConversion
{ Ext
, Trunc
};
2458 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2460 // ZExtOrig - use the original condition code, zero-extend value
2461 // ZExtInvert - invert the condition code, zero-extend value
2462 // SExtOrig - use the original condition code, sign-extend value
2463 // SExtInvert - invert the condition code, sign-extend value
2464 enum SetccInGPROpts
{ ZExtOrig
, ZExtInvert
, SExtOrig
, SExtInvert
};
2466 // Comparisons against zero to emit GPR code sequences for. Each of these
2467 // sequences may need to be emitted for two or more equivalent patterns.
2468 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2469 // matters as well as the extension type: sext (-1/0), zext (1/0).
2470 // GEZExt - (zext (LHS >= 0))
2471 // GESExt - (sext (LHS >= 0))
2472 // LEZExt - (zext (LHS <= 0))
2473 // LESExt - (sext (LHS <= 0))
2474 enum ZeroCompare
{ GEZExt
, GESExt
, LEZExt
, LESExt
};
2476 SDNode
*tryEXTEND(SDNode
*N
);
2477 SDNode
*tryLogicOpOfCompares(SDNode
*N
);
2478 SDValue
computeLogicOpInGPR(SDValue LogicOp
);
2479 SDValue
signExtendInputIfNeeded(SDValue Input
);
2480 SDValue
zeroExtendInputIfNeeded(SDValue Input
);
2481 SDValue
addExtOrTrunc(SDValue NatWidthRes
, ExtOrTruncConversion Conv
);
2482 SDValue
getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2484 SDValue
get32BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2485 int64_t RHSValue
, SDLoc dl
);
2486 SDValue
get32BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2487 int64_t RHSValue
, SDLoc dl
);
2488 SDValue
get64BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2489 int64_t RHSValue
, SDLoc dl
);
2490 SDValue
get64BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2491 int64_t RHSValue
, SDLoc dl
);
2492 SDValue
getSETCCInGPR(SDValue Compare
, SetccInGPROpts ConvOpts
);
2495 IntegerCompareEliminator(SelectionDAG
*DAG
,
2496 PPCDAGToDAGISel
*Sel
) : CurDAG(DAG
), S(Sel
) {
2497 assert(CurDAG
->getTargetLoweringInfo()
2498 .getPointerTy(CurDAG
->getDataLayout()).getSizeInBits() == 64 &&
2499 "Only expecting to use this on 64 bit targets.");
2501 SDNode
*Select(SDNode
*N
) {
2502 if (CmpInGPR
== ICGPR_None
)
2504 switch (N
->getOpcode()) {
2506 case ISD::ZERO_EXTEND
:
2507 if (CmpInGPR
== ICGPR_Sext
|| CmpInGPR
== ICGPR_SextI32
||
2508 CmpInGPR
== ICGPR_SextI64
)
2511 case ISD::SIGN_EXTEND
:
2512 if (CmpInGPR
== ICGPR_Zext
|| CmpInGPR
== ICGPR_ZextI32
||
2513 CmpInGPR
== ICGPR_ZextI64
)
2515 return tryEXTEND(N
);
2519 return tryLogicOpOfCompares(N
);
2525 static bool isLogicOp(unsigned Opc
) {
2526 return Opc
== ISD::AND
|| Opc
== ISD::OR
|| Opc
== ISD::XOR
;
2528 // The obvious case for wanting to keep the value in a GPR. Namely, the
2529 // result of the comparison is actually needed in a GPR.
2530 SDNode
*IntegerCompareEliminator::tryEXTEND(SDNode
*N
) {
2531 assert((N
->getOpcode() == ISD::ZERO_EXTEND
||
2532 N
->getOpcode() == ISD::SIGN_EXTEND
) &&
2533 "Expecting a zero/sign extend node!");
2535 // If we are zero-extending the result of a logical operation on i1
2536 // values, we can keep the values in GPRs.
2537 if (isLogicOp(N
->getOperand(0).getOpcode()) &&
2538 N
->getOperand(0).getValueType() == MVT::i1
&&
2539 N
->getOpcode() == ISD::ZERO_EXTEND
)
2540 WideRes
= computeLogicOpInGPR(N
->getOperand(0));
2541 else if (N
->getOperand(0).getOpcode() != ISD::SETCC
)
2545 getSETCCInGPR(N
->getOperand(0),
2546 N
->getOpcode() == ISD::SIGN_EXTEND
?
2547 SetccInGPROpts::SExtOrig
: SetccInGPROpts::ZExtOrig
);
2553 bool Input32Bit
= WideRes
.getValueType() == MVT::i32
;
2554 bool Output32Bit
= N
->getValueType(0) == MVT::i32
;
2556 NumSextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 1 : 0;
2557 NumZextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 0 : 1;
2559 SDValue ConvOp
= WideRes
;
2560 if (Input32Bit
!= Output32Bit
)
2561 ConvOp
= addExtOrTrunc(WideRes
, Input32Bit
? ExtOrTruncConversion::Ext
:
2562 ExtOrTruncConversion::Trunc
);
2563 return ConvOp
.getNode();
2566 // Attempt to perform logical operations on the results of comparisons while
2567 // keeping the values in GPRs. Without doing so, these would end up being
2568 // lowered to CR-logical operations which suffer from significant latency and
2570 SDNode
*IntegerCompareEliminator::tryLogicOpOfCompares(SDNode
*N
) {
2571 if (N
->getValueType(0) != MVT::i1
)
2573 assert(isLogicOp(N
->getOpcode()) &&
2574 "Expected a logic operation on setcc results.");
2575 SDValue LoweredLogical
= computeLogicOpInGPR(SDValue(N
, 0));
2576 if (!LoweredLogical
)
2580 bool IsBitwiseNegate
= LoweredLogical
.getMachineOpcode() == PPC::XORI8
;
2581 unsigned SubRegToExtract
= IsBitwiseNegate
? PPC::sub_eq
: PPC::sub_gt
;
2582 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
2583 SDValue LHS
= LoweredLogical
.getOperand(0);
2584 SDValue RHS
= LoweredLogical
.getOperand(1);
2586 SDValue OpToConvToRecForm
;
2588 // Look through any 32-bit to 64-bit implicit extend nodes to find the
2589 // opcode that is input to the XORI.
2590 if (IsBitwiseNegate
&&
2591 LoweredLogical
.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG
)
2592 OpToConvToRecForm
= LoweredLogical
.getOperand(0).getOperand(1);
2593 else if (IsBitwiseNegate
)
2594 // If the input to the XORI isn't an extension, that's what we're after.
2595 OpToConvToRecForm
= LoweredLogical
.getOperand(0);
2597 // If this is not an XORI, it is a reg-reg logical op and we can convert
2598 // it to record-form.
2599 OpToConvToRecForm
= LoweredLogical
;
2601 // Get the record-form version of the node we're looking to use to get the
2603 uint16_t NonRecOpc
= OpToConvToRecForm
.getMachineOpcode();
2604 int NewOpc
= PPCInstrInfo::getRecordFormOpcode(NonRecOpc
);
2606 // Convert the right node to record-form. This is either the logical we're
2607 // looking at or it is the input node to the negation (if we're looking at
2608 // a bitwise negation).
2609 if (NewOpc
!= -1 && IsBitwiseNegate
) {
2610 // The input to the XORI has a record-form. Use it.
2611 assert(LoweredLogical
.getConstantOperandVal(1) == 1 &&
2612 "Expected a PPC::XORI8 only for bitwise negation.");
2613 // Emit the record-form instruction.
2614 std::vector
<SDValue
> Ops
;
2615 for (int i
= 0, e
= OpToConvToRecForm
.getNumOperands(); i
< e
; i
++)
2616 Ops
.push_back(OpToConvToRecForm
.getOperand(i
));
2619 SDValue(CurDAG
->getMachineNode(NewOpc
, dl
,
2620 OpToConvToRecForm
.getValueType(),
2621 MVT::Glue
, Ops
), 0);
2623 assert((NewOpc
!= -1 || !IsBitwiseNegate
) &&
2624 "No record form available for AND8/OR8/XOR8?");
2626 SDValue(CurDAG
->getMachineNode(NewOpc
== -1 ? PPC::ANDIo8
: NewOpc
, dl
,
2627 MVT::i64
, MVT::Glue
, LHS
, RHS
), 0);
2630 // Select this node to a single bit from CR0 set by the record-form node
2631 // just created. For bitwise negation, use the EQ bit which is the equivalent
2632 // of negating the result (i.e. it is a bit set when the result of the
2633 // operation is zero).
2635 CurDAG
->getTargetConstant(SubRegToExtract
, dl
, MVT::i32
);
2637 SDValue(CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
, dl
,
2638 MVT::i1
, CR0Reg
, SRIdxVal
,
2639 WideOp
.getValue(1)), 0);
2640 return CRBit
.getNode();
2643 // Lower a logical operation on i1 values into a GPR sequence if possible.
2644 // The result can be kept in a GPR if requested.
2645 // Three types of inputs can be handled:
2648 // - Logical operation (AND/OR/XOR)
2649 // There is also a special case that is handled (namely a complement operation
2650 // achieved with xor %a, -1).
2651 SDValue
IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp
) {
2652 assert(isLogicOp(LogicOp
.getOpcode()) &&
2653 "Can only handle logic operations here.");
2654 assert(LogicOp
.getValueType() == MVT::i1
&&
2655 "Can only handle logic operations on i1 values here.");
2659 // Special case: xor %a, -1
2660 bool IsBitwiseNegation
= isBitwiseNot(LogicOp
);
2662 // Produces a GPR sequence for each operand of the binary logic operation.
2663 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2664 // the value in a GPR and for logic operations, it will recursively produce
2665 // a GPR sequence for the operation.
2666 auto getLogicOperand
= [&] (SDValue Operand
) -> SDValue
{
2667 unsigned OperandOpcode
= Operand
.getOpcode();
2668 if (OperandOpcode
== ISD::SETCC
)
2669 return getSETCCInGPR(Operand
, SetccInGPROpts::ZExtOrig
);
2670 else if (OperandOpcode
== ISD::TRUNCATE
) {
2671 SDValue InputOp
= Operand
.getOperand(0);
2672 EVT InVT
= InputOp
.getValueType();
2673 return SDValue(CurDAG
->getMachineNode(InVT
== MVT::i32
? PPC::RLDICL_32
:
2674 PPC::RLDICL
, dl
, InVT
, InputOp
,
2675 S
->getI64Imm(0, dl
),
2676 S
->getI64Imm(63, dl
)), 0);
2677 } else if (isLogicOp(OperandOpcode
))
2678 return computeLogicOpInGPR(Operand
);
2681 LHS
= getLogicOperand(LogicOp
.getOperand(0));
2682 RHS
= getLogicOperand(LogicOp
.getOperand(1));
2684 // If a GPR sequence can't be produced for the LHS we can't proceed.
2685 // Not producing a GPR sequence for the RHS is only a problem if this isn't
2686 // a bitwise negation operation.
2687 if (!LHS
|| (!RHS
&& !IsBitwiseNegation
))
2690 NumLogicOpsOnComparison
++;
2692 // We will use the inputs as 64-bit values.
2693 if (LHS
.getValueType() == MVT::i32
)
2694 LHS
= addExtOrTrunc(LHS
, ExtOrTruncConversion::Ext
);
2695 if (!IsBitwiseNegation
&& RHS
.getValueType() == MVT::i32
)
2696 RHS
= addExtOrTrunc(RHS
, ExtOrTruncConversion::Ext
);
2699 switch (LogicOp
.getOpcode()) {
2700 default: llvm_unreachable("Unknown logic operation.");
2701 case ISD::AND
: NewOpc
= PPC::AND8
; break;
2702 case ISD::OR
: NewOpc
= PPC::OR8
; break;
2703 case ISD::XOR
: NewOpc
= PPC::XOR8
; break;
2706 if (IsBitwiseNegation
) {
2707 RHS
= S
->getI64Imm(1, dl
);
2708 NewOpc
= PPC::XORI8
;
2711 return SDValue(CurDAG
->getMachineNode(NewOpc
, dl
, MVT::i64
, LHS
, RHS
), 0);
2715 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2716 /// Otherwise just reinterpret it as a 64-bit value.
2717 /// Useful when emitting comparison code for 32-bit values without using
2718 /// the compare instruction (which only considers the lower 32-bits).
2719 SDValue
IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input
) {
2720 assert(Input
.getValueType() == MVT::i32
&&
2721 "Can only sign-extend 32-bit values here.");
2722 unsigned Opc
= Input
.getOpcode();
2724 // The value was sign extended and then truncated to 32-bits. No need to
2725 // sign extend it again.
2726 if (Opc
== ISD::TRUNCATE
&&
2727 (Input
.getOperand(0).getOpcode() == ISD::AssertSext
||
2728 Input
.getOperand(0).getOpcode() == ISD::SIGN_EXTEND
))
2729 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2731 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2732 // The input is a sign-extending load. All ppc sign-extending loads
2733 // sign-extend to the full 64-bits.
2734 if (InputLoad
&& InputLoad
->getExtensionType() == ISD::SEXTLOAD
)
2735 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2737 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2738 // We don't sign-extend constants.
2740 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2743 SignExtensionsAdded
++;
2744 return SDValue(CurDAG
->getMachineNode(PPC::EXTSW_32_64
, dl
,
2745 MVT::i64
, Input
), 0);
2748 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2749 /// Otherwise just reinterpret it as a 64-bit value.
2750 /// Useful when emitting comparison code for 32-bit values without using
2751 /// the compare instruction (which only considers the lower 32-bits).
2752 SDValue
IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input
) {
2753 assert(Input
.getValueType() == MVT::i32
&&
2754 "Can only zero-extend 32-bit values here.");
2755 unsigned Opc
= Input
.getOpcode();
2757 // The only condition under which we can omit the actual extend instruction:
2758 // - The value is a positive constant
2759 // - The value comes from a load that isn't a sign-extending load
2760 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2761 bool IsTruncateOfZExt
= Opc
== ISD::TRUNCATE
&&
2762 (Input
.getOperand(0).getOpcode() == ISD::AssertZext
||
2763 Input
.getOperand(0).getOpcode() == ISD::ZERO_EXTEND
);
2764 if (IsTruncateOfZExt
)
2765 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2767 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2768 if (InputConst
&& InputConst
->getSExtValue() >= 0)
2769 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2771 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2772 // The input is a load that doesn't sign-extend (it will be zero-extended).
2773 if (InputLoad
&& InputLoad
->getExtensionType() != ISD::SEXTLOAD
)
2774 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2776 // None of the above, need to zero-extend.
2778 ZeroExtensionsAdded
++;
2779 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL_32_64
, dl
, MVT::i64
, Input
,
2780 S
->getI64Imm(0, dl
),
2781 S
->getI64Imm(32, dl
)), 0);
2784 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2785 // course not actual zero/sign extensions that will generate machine code,
2786 // they're just a way to reinterpret a 32 bit value in a register as a
2787 // 64 bit value and vice-versa.
2788 SDValue
IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes
,
2789 ExtOrTruncConversion Conv
) {
2790 SDLoc
dl(NatWidthRes
);
2792 // For reinterpreting 32-bit values as 64 bit values, we generate
2793 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2794 if (Conv
== ExtOrTruncConversion::Ext
) {
2795 SDValue
ImDef(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
, MVT::i64
), 0);
2797 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2798 return SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
, MVT::i64
,
2799 ImDef
, NatWidthRes
, SubRegIdx
), 0);
2802 assert(Conv
== ExtOrTruncConversion::Trunc
&&
2803 "Unknown convertion between 32 and 64 bit values.");
2804 // For reinterpreting 64-bit values as 32-bit values, we just need to
2805 // EXTRACT_SUBREG (i.e. extract the low word).
2807 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2808 return SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
, MVT::i32
,
2809 NatWidthRes
, SubRegIdx
), 0);
2812 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2813 // Handle both zero-extensions and sign-extensions.
2815 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2816 ZeroCompare CmpTy
) {
2817 EVT InVT
= LHS
.getValueType();
2818 bool Is32Bit
= InVT
== MVT::i32
;
2821 // Produce the value that needs to be either zero or sign extended.
2823 case ZeroCompare::GEZExt
:
2824 case ZeroCompare::GESExt
:
2825 ToExtend
= SDValue(CurDAG
->getMachineNode(Is32Bit
? PPC::NOR
: PPC::NOR8
,
2826 dl
, InVT
, LHS
, LHS
), 0);
2828 case ZeroCompare::LEZExt
:
2829 case ZeroCompare::LESExt
: {
2831 // Upper 32 bits cannot be undefined for this sequence.
2832 LHS
= signExtendInputIfNeeded(LHS
);
2834 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2836 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2837 Neg
, S
->getI64Imm(1, dl
),
2838 S
->getI64Imm(63, dl
)), 0);
2841 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
2842 S
->getI64Imm(~0ULL, dl
)), 0);
2843 ToExtend
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2850 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2852 (CmpTy
== ZeroCompare::GEZExt
|| CmpTy
== ZeroCompare::LEZExt
))
2853 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2854 ToExtend
, S
->getI64Imm(1, dl
),
2855 S
->getI64Imm(63, dl
)), 0);
2857 (CmpTy
== ZeroCompare::GESExt
|| CmpTy
== ZeroCompare::LESExt
))
2858 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, ToExtend
,
2859 S
->getI64Imm(63, dl
)), 0);
2861 assert(Is32Bit
&& "Should have handled the 32-bit sequences above.");
2862 // For 32-bit sequences, the extensions differ between GE/LE cases.
2864 case ZeroCompare::GEZExt
: {
2865 SDValue ShiftOps
[] = { ToExtend
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
2866 S
->getI32Imm(31, dl
) };
2867 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2870 case ZeroCompare::GESExt
:
2871 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, ToExtend
,
2872 S
->getI32Imm(31, dl
)), 0);
2873 case ZeroCompare::LEZExt
:
2874 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, ToExtend
,
2875 S
->getI32Imm(1, dl
)), 0);
2876 case ZeroCompare::LESExt
:
2877 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, ToExtend
,
2878 S
->getI32Imm(-1, dl
)), 0);
2881 // The above case covers all the enumerators so it can't have a default clause
2882 // to avoid compiler warnings.
2883 llvm_unreachable("Unknown zero-comparison type.");
2886 /// Produces a zero-extended result of comparing two 32-bit values according to
2887 /// the passed condition code.
2889 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS
, SDValue RHS
,
2891 int64_t RHSValue
, SDLoc dl
) {
2892 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
2893 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Sext
)
2895 bool IsRHSZero
= RHSValue
== 0;
2896 bool IsRHSOne
= RHSValue
== 1;
2897 bool IsRHSNegOne
= RHSValue
== -1LL;
2899 default: return SDValue();
2901 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2902 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
2903 SDValue Xor
= IsRHSZero
? LHS
:
2904 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2906 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2907 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
2908 S
->getI32Imm(31, dl
) };
2909 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2913 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2914 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
2915 SDValue Xor
= IsRHSZero
? LHS
:
2916 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2918 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2919 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
2920 S
->getI32Imm(31, dl
) };
2922 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
2923 return SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
2924 S
->getI32Imm(1, dl
)), 0);
2927 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2928 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
2930 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2932 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2933 // by swapping inputs and falling through.
2934 std::swap(LHS
, RHS
);
2935 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2936 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2940 if (CmpInGPR
== ICGPR_NonExtIn
)
2942 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2943 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
2945 if (CmpInGPR
== ICGPR_NonExtIn
)
2947 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
2950 // The upper 32-bits of the register can't be undefined for this sequence.
2951 LHS
= signExtendInputIfNeeded(LHS
);
2952 RHS
= signExtendInputIfNeeded(RHS
);
2954 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
2956 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Sub
,
2957 S
->getI64Imm(1, dl
), S
->getI64Imm(63, dl
)),
2960 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
,
2961 MVT::i64
, Shift
, S
->getI32Imm(1, dl
)), 0);
2964 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2965 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2966 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
2967 // Handle SETLT -1 (which is equivalent to SETGE 0).
2969 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2972 if (CmpInGPR
== ICGPR_NonExtIn
)
2974 // The upper 32-bits of the register can't be undefined for this sequence.
2975 LHS
= signExtendInputIfNeeded(LHS
);
2976 RHS
= signExtendInputIfNeeded(RHS
);
2978 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2979 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2980 Neg
, S
->getI32Imm(1, dl
), S
->getI32Imm(63, dl
)), 0);
2982 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2983 // (%b < %a) by swapping inputs and falling through.
2984 std::swap(LHS
, RHS
);
2985 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2986 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2987 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
2991 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2992 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
2993 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
2994 // Handle SETLT 1 (which is equivalent to SETLE 0).
2996 if (CmpInGPR
== ICGPR_NonExtIn
)
2998 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3002 SDValue ShiftOps
[] = { LHS
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
3003 S
->getI32Imm(31, dl
) };
3004 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
3008 if (CmpInGPR
== ICGPR_NonExtIn
)
3010 // The upper 32-bits of the register can't be undefined for this sequence.
3011 LHS
= signExtendInputIfNeeded(LHS
);
3012 RHS
= signExtendInputIfNeeded(RHS
);
3014 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3015 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3016 SUBFNode
, S
->getI64Imm(1, dl
),
3017 S
->getI64Imm(63, dl
)), 0);
3020 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3021 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3022 std::swap(LHS
, RHS
);
3025 if (CmpInGPR
== ICGPR_NonExtIn
)
3027 // The upper 32-bits of the register can't be undefined for this sequence.
3028 LHS
= zeroExtendInputIfNeeded(LHS
);
3029 RHS
= zeroExtendInputIfNeeded(RHS
);
3031 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3033 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3034 Subtract
, S
->getI64Imm(1, dl
),
3035 S
->getI64Imm(63, dl
)), 0);
3036 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, SrdiNode
,
3037 S
->getI32Imm(1, dl
)), 0);
3040 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3041 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3042 std::swap(LHS
, RHS
);
3045 if (CmpInGPR
== ICGPR_NonExtIn
)
3047 // The upper 32-bits of the register can't be undefined for this sequence.
3048 LHS
= zeroExtendInputIfNeeded(LHS
);
3049 RHS
= zeroExtendInputIfNeeded(RHS
);
3051 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3052 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3053 Subtract
, S
->getI64Imm(1, dl
),
3054 S
->getI64Imm(63, dl
)), 0);
3059 /// Produces a sign-extended result of comparing two 32-bit values according to
3060 /// the passed condition code.
3062 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS
, SDValue RHS
,
3064 int64_t RHSValue
, SDLoc dl
) {
3065 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
3066 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Zext
)
3068 bool IsRHSZero
= RHSValue
== 0;
3069 bool IsRHSOne
= RHSValue
== 1;
3070 bool IsRHSNegOne
= RHSValue
== -1LL;
3073 default: return SDValue();
3075 // (sext (setcc %a, %b, seteq)) ->
3076 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3077 // (sext (setcc %a, 0, seteq)) ->
3078 // (ashr (shl (ctlz %a), 58), 63)
3079 SDValue CountInput
= IsRHSZero
? LHS
:
3080 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3082 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, CountInput
), 0);
3083 SDValue SHLOps
[] = { Cntlzw
, S
->getI32Imm(27, dl
),
3084 S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
3086 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, SHLOps
), 0);
3087 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Slwi
), 0);
3090 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3091 // flip the bit, finally take 2's complement.
3092 // (sext (setcc %a, %b, setne)) ->
3093 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3094 // Same as above, but the first xor is not needed.
3095 // (sext (setcc %a, 0, setne)) ->
3096 // (neg (xor (lshr (ctlz %a), 5), 1))
3097 SDValue Xor
= IsRHSZero
? LHS
:
3098 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3100 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
3101 SDValue ShiftOps
[] =
3102 { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
3104 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
3106 SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
3107 S
->getI32Imm(1, dl
)), 0);
3108 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Xori
), 0);
3111 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3112 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3114 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3116 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3117 // by swapping inputs and falling through.
3118 std::swap(LHS
, RHS
);
3119 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3120 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3124 if (CmpInGPR
== ICGPR_NonExtIn
)
3126 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3127 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3129 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3131 // The upper 32-bits of the register can't be undefined for this sequence.
3132 LHS
= signExtendInputIfNeeded(LHS
);
3133 RHS
= signExtendInputIfNeeded(RHS
);
3135 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, MVT::Glue
,
3138 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3139 SUBFNode
, S
->getI64Imm(1, dl
),
3140 S
->getI64Imm(63, dl
)), 0);
3141 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Srdi
,
3142 S
->getI32Imm(-1, dl
)), 0);
3145 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3146 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3147 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3149 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3151 if (CmpInGPR
== ICGPR_NonExtIn
)
3153 // The upper 32-bits of the register can't be undefined for this sequence.
3154 LHS
= signExtendInputIfNeeded(LHS
);
3155 RHS
= signExtendInputIfNeeded(RHS
);
3157 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
3158 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Neg
,
3159 S
->getI64Imm(63, dl
)), 0);
3161 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3162 // (%b < %a) by swapping inputs and falling through.
3163 std::swap(LHS
, RHS
);
3164 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3165 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3166 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3170 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3171 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3172 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3174 if (CmpInGPR
== ICGPR_NonExtIn
)
3176 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3179 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, LHS
,
3180 S
->getI32Imm(31, dl
)), 0);
3182 if (CmpInGPR
== ICGPR_NonExtIn
)
3184 // The upper 32-bits of the register can't be undefined for this sequence.
3185 LHS
= signExtendInputIfNeeded(LHS
);
3186 RHS
= signExtendInputIfNeeded(RHS
);
3188 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3189 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3190 SUBFNode
, S
->getI64Imm(63, dl
)), 0);
3193 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3194 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3195 std::swap(LHS
, RHS
);
3198 if (CmpInGPR
== ICGPR_NonExtIn
)
3200 // The upper 32-bits of the register can't be undefined for this sequence.
3201 LHS
= zeroExtendInputIfNeeded(LHS
);
3202 RHS
= zeroExtendInputIfNeeded(RHS
);
3204 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3206 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Subtract
,
3207 S
->getI32Imm(1, dl
), S
->getI32Imm(63,dl
)),
3209 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Shift
,
3210 S
->getI32Imm(-1, dl
)), 0);
3213 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3214 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3215 std::swap(LHS
, RHS
);
3218 if (CmpInGPR
== ICGPR_NonExtIn
)
3220 // The upper 32-bits of the register can't be undefined for this sequence.
3221 LHS
= zeroExtendInputIfNeeded(LHS
);
3222 RHS
= zeroExtendInputIfNeeded(RHS
);
3224 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3225 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3226 Subtract
, S
->getI64Imm(63, dl
)), 0);
3231 /// Produces a zero-extended result of comparing two 64-bit values according to
3232 /// the passed condition code.
3234 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS
, SDValue RHS
,
3236 int64_t RHSValue
, SDLoc dl
) {
3237 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3238 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Sext
)
3240 bool IsRHSZero
= RHSValue
== 0;
3241 bool IsRHSOne
= RHSValue
== 1;
3242 bool IsRHSNegOne
= RHSValue
== -1LL;
3244 default: return SDValue();
3246 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3247 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3248 SDValue Xor
= IsRHSZero
? LHS
:
3249 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3251 SDValue(CurDAG
->getMachineNode(PPC::CNTLZD
, dl
, MVT::i64
, Xor
), 0);
3252 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Clz
,
3253 S
->getI64Imm(58, dl
),
3254 S
->getI64Imm(63, dl
)), 0);
3257 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3258 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3259 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3260 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3261 SDValue Xor
= IsRHSZero
? LHS
:
3262 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3264 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3265 Xor
, S
->getI32Imm(~0U, dl
)), 0);
3266 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, AC
,
3267 Xor
, AC
.getValue(1)), 0);
3270 // {subc.reg, subc.CA} = (subcarry %a, %b)
3271 // (zext (setcc %a, %b, setge)) ->
3272 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3273 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3275 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3276 std::swap(LHS
, RHS
);
3277 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3278 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3282 // {subc.reg, subc.CA} = (subcarry %b, %a)
3283 // (zext (setcc %a, %b, setge)) ->
3284 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3285 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3287 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3289 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3290 S
->getI64Imm(1, dl
),
3291 S
->getI64Imm(63, dl
)), 0);
3293 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3294 S
->getI64Imm(63, dl
)), 0);
3295 SDValue SubtractCarry
=
3296 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3298 return SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3299 ShiftR
, ShiftL
, SubtractCarry
), 0);
3302 // {subc.reg, subc.CA} = (subcarry %b, %a)
3303 // (zext (setcc %a, %b, setgt)) ->
3304 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3305 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3307 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3310 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3311 S
->getI64Imm(~0ULL, dl
)), 0);
3313 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Addi
, LHS
), 0);
3314 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Nor
,
3315 S
->getI64Imm(1, dl
),
3316 S
->getI64Imm(63, dl
)), 0);
3318 std::swap(LHS
, RHS
);
3319 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3320 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3321 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3325 // {subc.reg, subc.CA} = (subcarry %a, %b)
3326 // (zext (setcc %a, %b, setlt)) ->
3327 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3328 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3330 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3332 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3333 S
->getI64Imm(1, dl
),
3334 S
->getI64Imm(63, dl
)), 0);
3336 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3337 LHS
, S
->getI64Imm(63, dl
)), 0);
3339 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3340 RHS
, S
->getI64Imm(1, dl
),
3341 S
->getI64Imm(63, dl
)), 0);
3342 SDValue SUBFC8Carry
=
3343 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3346 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3347 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3348 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3349 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3352 // {subc.reg, subc.CA} = (subcarry %a, %b)
3353 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3354 std::swap(LHS
, RHS
);
3357 // {subc.reg, subc.CA} = (subcarry %b, %a)
3358 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3359 SDValue SUBFC8Carry
=
3360 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3362 SDValue SUBFE8Node
=
3363 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
,
3364 LHS
, LHS
, SUBFC8Carry
), 0);
3365 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
,
3366 SUBFE8Node
, S
->getI64Imm(1, dl
)), 0);
3369 // {subc.reg, subc.CA} = (subcarry %b, %a)
3370 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3371 std::swap(LHS
, RHS
);
3374 // {subc.reg, subc.CA} = (subcarry %a, %b)
3375 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3376 SDValue SubtractCarry
=
3377 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3380 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3381 LHS
, LHS
, SubtractCarry
), 0);
3382 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3388 /// Produces a sign-extended result of comparing two 64-bit values according to
3389 /// the passed condition code.
3391 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS
, SDValue RHS
,
3393 int64_t RHSValue
, SDLoc dl
) {
3394 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3395 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Zext
)
3397 bool IsRHSZero
= RHSValue
== 0;
3398 bool IsRHSOne
= RHSValue
== 1;
3399 bool IsRHSNegOne
= RHSValue
== -1LL;
3401 default: return SDValue();
3403 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3404 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3405 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3406 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3407 SDValue AddInput
= IsRHSZero
? LHS
:
3408 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3410 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3411 AddInput
, S
->getI32Imm(~0U, dl
)), 0);
3412 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, Addic
,
3413 Addic
, Addic
.getValue(1)), 0);
3416 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3417 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3418 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3419 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3420 SDValue Xor
= IsRHSZero
? LHS
:
3421 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3423 SDValue(CurDAG
->getMachineNode(PPC::SUBFIC8
, dl
, MVT::i64
, MVT::Glue
,
3424 Xor
, S
->getI32Imm(0, dl
)), 0);
3425 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, SC
,
3426 SC
, SC
.getValue(1)), 0);
3429 // {subc.reg, subc.CA} = (subcarry %a, %b)
3430 // (zext (setcc %a, %b, setge)) ->
3431 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3432 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3434 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3435 std::swap(LHS
, RHS
);
3436 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3437 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3441 // {subc.reg, subc.CA} = (subcarry %b, %a)
3442 // (zext (setcc %a, %b, setge)) ->
3443 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3444 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3446 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3448 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3449 S
->getI64Imm(63, dl
)), 0);
3451 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3452 S
->getI64Imm(1, dl
),
3453 S
->getI64Imm(63, dl
)), 0);
3454 SDValue SubtractCarry
=
3455 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3458 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3459 ShiftR
, ShiftL
, SubtractCarry
), 0);
3460 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, Adde
), 0);
3463 // {subc.reg, subc.CA} = (subcarry %b, %a)
3464 // (zext (setcc %a, %b, setgt)) ->
3465 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3466 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3468 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3471 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3472 S
->getI64Imm(-1, dl
)), 0);
3474 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Add
, LHS
), 0);
3475 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Nor
,
3476 S
->getI64Imm(63, dl
)), 0);
3478 std::swap(LHS
, RHS
);
3479 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3480 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3481 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3485 // {subc.reg, subc.CA} = (subcarry %a, %b)
3486 // (zext (setcc %a, %b, setlt)) ->
3487 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3488 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3490 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3492 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, LHS
,
3493 S
->getI64Imm(63, dl
)), 0);
3496 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3497 LHS
, S
->getI64Imm(63, dl
)), 0);
3499 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3500 RHS
, S
->getI64Imm(1, dl
),
3501 S
->getI64Imm(63, dl
)), 0);
3502 SDValue SUBFC8Carry
=
3503 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3506 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
,
3507 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3509 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3510 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3511 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3515 // {subc.reg, subc.CA} = (subcarry %a, %b)
3516 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3517 std::swap(LHS
, RHS
);
3520 // {subc.reg, subc.CA} = (subcarry %b, %a)
3521 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3522 SDValue SubtractCarry
=
3523 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3526 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
, LHS
,
3527 LHS
, SubtractCarry
), 0);
3528 return SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
,
3529 ExtSub
, ExtSub
), 0);
3532 // {subc.reg, subc.CA} = (subcarry %b, %a)
3533 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3534 std::swap(LHS
, RHS
);
3537 // {subc.reg, subc.CA} = (subcarry %a, %b)
3538 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3540 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3542 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3543 LHS
, LHS
, SubCarry
), 0);
3548 /// Do all uses of this SDValue need the result in a GPR?
3549 /// This is meant to be used on values that have type i1 since
3550 /// it is somewhat meaningless to ask if values of other types
3551 /// should be kept in GPR's.
3552 static bool allUsesExtend(SDValue Compare
, SelectionDAG
*CurDAG
) {
3553 assert(Compare
.getOpcode() == ISD::SETCC
&&
3554 "An ISD::SETCC node required here.");
3556 // For values that have a single use, the caller should obviously already have
3557 // checked if that use is an extending use. We check the other uses here.
3558 if (Compare
.hasOneUse())
3560 // We want the value in a GPR if it is being extended, used for a select, or
3561 // used in logical operations.
3562 for (auto CompareUse
: Compare
.getNode()->uses())
3563 if (CompareUse
->getOpcode() != ISD::SIGN_EXTEND
&&
3564 CompareUse
->getOpcode() != ISD::ZERO_EXTEND
&&
3565 CompareUse
->getOpcode() != ISD::SELECT
&&
3566 !isLogicOp(CompareUse
->getOpcode())) {
3567 OmittedForNonExtendUses
++;
3573 /// Returns an equivalent of a SETCC node but with the result the same width as
3574 /// the inputs. This can also be used for SELECT_CC if either the true or false
3575 /// values is a power of two while the other is zero.
3576 SDValue
IntegerCompareEliminator::getSETCCInGPR(SDValue Compare
,
3577 SetccInGPROpts ConvOpts
) {
3578 assert((Compare
.getOpcode() == ISD::SETCC
||
3579 Compare
.getOpcode() == ISD::SELECT_CC
) &&
3580 "An ISD::SETCC node required here.");
3582 // Don't convert this comparison to a GPR sequence because there are uses
3583 // of the i1 result (i.e. uses that require the result in the CR).
3584 if ((Compare
.getOpcode() == ISD::SETCC
) && !allUsesExtend(Compare
, CurDAG
))
3587 SDValue LHS
= Compare
.getOperand(0);
3588 SDValue RHS
= Compare
.getOperand(1);
3590 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3591 int CCOpNum
= Compare
.getOpcode() == ISD::SELECT_CC
? 4 : 2;
3593 cast
<CondCodeSDNode
>(Compare
.getOperand(CCOpNum
))->get();
3594 EVT InputVT
= LHS
.getValueType();
3595 if (InputVT
!= MVT::i32
&& InputVT
!= MVT::i64
)
3598 if (ConvOpts
== SetccInGPROpts::ZExtInvert
||
3599 ConvOpts
== SetccInGPROpts::SExtInvert
)
3600 CC
= ISD::getSetCCInverse(CC
, true);
3602 bool Inputs32Bit
= InputVT
== MVT::i32
;
3605 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3606 int64_t RHSValue
= RHSConst
? RHSConst
->getSExtValue() : INT64_MAX
;
3607 bool IsSext
= ConvOpts
== SetccInGPROpts::SExtOrig
||
3608 ConvOpts
== SetccInGPROpts::SExtInvert
;
3610 if (IsSext
&& Inputs32Bit
)
3611 return get32BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3612 else if (Inputs32Bit
)
3613 return get32BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3615 return get64BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3616 return get64BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3619 } // end anonymous namespace
3621 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode
*N
) {
3622 if (N
->getValueType(0) != MVT::i32
&&
3623 N
->getValueType(0) != MVT::i64
)
3626 // This optimization will emit code that assumes 64-bit registers
3627 // so we don't want to run it in 32-bit mode. Also don't run it
3628 // on functions that are not to be optimized.
3629 if (TM
.getOptLevel() == CodeGenOpt::None
|| !TM
.isPPC64())
3632 switch (N
->getOpcode()) {
3634 case ISD::ZERO_EXTEND
:
3635 case ISD::SIGN_EXTEND
:
3639 IntegerCompareEliminator
ICmpElim(CurDAG
, this);
3640 if (SDNode
*New
= ICmpElim
.Select(N
)) {
3641 ReplaceNode(N
, New
);
3649 bool PPCDAGToDAGISel::tryBitPermutation(SDNode
*N
) {
3650 if (N
->getValueType(0) != MVT::i32
&&
3651 N
->getValueType(0) != MVT::i64
)
3654 if (!UseBitPermRewriter
)
3657 switch (N
->getOpcode()) {
3664 BitPermutationSelector
BPS(CurDAG
);
3665 if (SDNode
*New
= BPS
.Select(N
)) {
3666 ReplaceNode(N
, New
);
3676 /// SelectCC - Select a comparison of the specified values with the specified
3677 /// condition code, returning the CR# of the expression.
3678 SDValue
PPCDAGToDAGISel::SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
3680 // Always select the LHS.
3683 if (LHS
.getValueType() == MVT::i32
) {
3685 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3686 if (isInt32Immediate(RHS
, Imm
)) {
3687 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3688 if (isUInt
<16>(Imm
))
3689 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3690 getI32Imm(Imm
& 0xFFFF, dl
)),
3692 // If this is a 16-bit signed immediate, fold it.
3693 if (isInt
<16>((int)Imm
))
3694 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3695 getI32Imm(Imm
& 0xFFFF, dl
)),
3698 // For non-equality comparisons, the default code would materialize the
3699 // constant, then compare against it, like this:
3701 // ori r2, r2, 22136
3703 // Since we are just comparing for equality, we can emit this instead:
3704 // xoris r0,r3,0x1234
3705 // cmplwi cr0,r0,0x5678
3707 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS
, dl
, MVT::i32
, LHS
,
3708 getI32Imm(Imm
>> 16, dl
)), 0);
3709 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, Xor
,
3710 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3713 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3714 if (isInt32Immediate(RHS
, Imm
) && isUInt
<16>(Imm
))
3715 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3716 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3720 if (isIntS16Immediate(RHS
, SImm
))
3721 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3722 getI32Imm((int)SImm
& 0xFFFF,
3727 } else if (LHS
.getValueType() == MVT::i64
) {
3729 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3730 if (isInt64Immediate(RHS
.getNode(), Imm
)) {
3731 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3732 if (isUInt
<16>(Imm
))
3733 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3734 getI32Imm(Imm
& 0xFFFF, dl
)),
3736 // If this is a 16-bit signed immediate, fold it.
3738 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3739 getI32Imm(Imm
& 0xFFFF, dl
)),
3742 // For non-equality comparisons, the default code would materialize the
3743 // constant, then compare against it, like this:
3745 // ori r2, r2, 22136
3747 // Since we are just comparing for equality, we can emit this instead:
3748 // xoris r0,r3,0x1234
3749 // cmpldi cr0,r0,0x5678
3751 if (isUInt
<32>(Imm
)) {
3752 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS8
, dl
, MVT::i64
, LHS
,
3753 getI64Imm(Imm
>> 16, dl
)), 0);
3754 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, Xor
,
3755 getI64Imm(Imm
& 0xFFFF, dl
)),
3760 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3761 if (isInt64Immediate(RHS
.getNode(), Imm
) && isUInt
<16>(Imm
))
3762 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3763 getI64Imm(Imm
& 0xFFFF, dl
)), 0);
3767 if (isIntS16Immediate(RHS
, SImm
))
3768 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3769 getI64Imm(SImm
& 0xFFFF, dl
)),
3773 } else if (LHS
.getValueType() == MVT::f32
) {
3774 if (PPCSubTarget
->hasSPE()) {
3779 Opc
= PPC::EFSCMPEQ
;
3787 Opc
= PPC::EFSCMPLT
;
3795 Opc
= PPC::EFSCMPGT
;
3800 } else if (LHS
.getValueType() == MVT::f64
) {
3801 if (PPCSubTarget
->hasSPE()) {
3806 Opc
= PPC::EFDCMPEQ
;
3814 Opc
= PPC::EFDCMPLT
;
3822 Opc
= PPC::EFDCMPGT
;
3826 Opc
= PPCSubTarget
->hasVSX() ? PPC::XSCMPUDP
: PPC::FCMPUD
;
3828 assert(LHS
.getValueType() == MVT::f128
&& "Unknown vt!");
3829 assert(PPCSubTarget
->hasVSX() && "__float128 requires VSX");
3830 Opc
= PPC::XSCMPUQP
;
3832 return SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, LHS
, RHS
), 0);
3835 static PPC::Predicate
getPredicateForSetCC(ISD::CondCode CC
) {
3841 llvm_unreachable("Should be lowered by legalize!");
3842 default: llvm_unreachable("Unknown condition!");
3844 case ISD::SETEQ
: return PPC::PRED_EQ
;
3846 case ISD::SETNE
: return PPC::PRED_NE
;
3848 case ISD::SETLT
: return PPC::PRED_LT
;
3850 case ISD::SETLE
: return PPC::PRED_LE
;
3852 case ISD::SETGT
: return PPC::PRED_GT
;
3854 case ISD::SETGE
: return PPC::PRED_GE
;
3855 case ISD::SETO
: return PPC::PRED_NU
;
3856 case ISD::SETUO
: return PPC::PRED_UN
;
3857 // These two are invalid for floating point. Assume we have int.
3858 case ISD::SETULT
: return PPC::PRED_LT
;
3859 case ISD::SETUGT
: return PPC::PRED_GT
;
3863 /// getCRIdxForSetCC - Return the index of the condition register field
3864 /// associated with the SetCC condition, and whether or not the field is
3865 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
3866 static unsigned getCRIdxForSetCC(ISD::CondCode CC
, bool &Invert
) {
3869 default: llvm_unreachable("Unknown condition!");
3871 case ISD::SETLT
: return 0; // Bit #0 = SETOLT
3873 case ISD::SETGT
: return 1; // Bit #1 = SETOGT
3875 case ISD::SETEQ
: return 2; // Bit #2 = SETOEQ
3876 case ISD::SETUO
: return 3; // Bit #3 = SETUO
3878 case ISD::SETGE
: Invert
= true; return 0; // !Bit #0 = SETUGE
3880 case ISD::SETLE
: Invert
= true; return 1; // !Bit #1 = SETULE
3882 case ISD::SETNE
: Invert
= true; return 2; // !Bit #2 = SETUNE
3883 case ISD::SETO
: Invert
= true; return 3; // !Bit #3 = SETO
3888 llvm_unreachable("Invalid branch code: should be expanded by legalize");
3889 // These are invalid for floating point. Assume integer.
3890 case ISD::SETULT
: return 0;
3891 case ISD::SETUGT
: return 1;
3895 // getVCmpInst: return the vector compare instruction for the specified
3896 // vector type and condition code. Since this is for altivec specific code,
3897 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3898 static unsigned int getVCmpInst(MVT VecVT
, ISD::CondCode CC
,
3899 bool HasVSX
, bool &Swap
, bool &Negate
) {
3903 if (VecVT
.isFloatingPoint()) {
3904 /* Handle some cases by swapping input operands. */
3906 case ISD::SETLE
: CC
= ISD::SETGE
; Swap
= true; break;
3907 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
3908 case ISD::SETOLE
: CC
= ISD::SETOGE
; Swap
= true; break;
3909 case ISD::SETOLT
: CC
= ISD::SETOGT
; Swap
= true; break;
3910 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
3911 case ISD::SETUGT
: CC
= ISD::SETULT
; Swap
= true; break;
3914 /* Handle some cases by negating the result. */
3916 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
3917 case ISD::SETUNE
: CC
= ISD::SETOEQ
; Negate
= true; break;
3918 case ISD::SETULE
: CC
= ISD::SETOGT
; Negate
= true; break;
3919 case ISD::SETULT
: CC
= ISD::SETOGE
; Negate
= true; break;
3922 /* We have instructions implementing the remaining cases. */
3926 if (VecVT
== MVT::v4f32
)
3927 return HasVSX
? PPC::XVCMPEQSP
: PPC::VCMPEQFP
;
3928 else if (VecVT
== MVT::v2f64
)
3929 return PPC::XVCMPEQDP
;
3933 if (VecVT
== MVT::v4f32
)
3934 return HasVSX
? PPC::XVCMPGTSP
: PPC::VCMPGTFP
;
3935 else if (VecVT
== MVT::v2f64
)
3936 return PPC::XVCMPGTDP
;
3940 if (VecVT
== MVT::v4f32
)
3941 return HasVSX
? PPC::XVCMPGESP
: PPC::VCMPGEFP
;
3942 else if (VecVT
== MVT::v2f64
)
3943 return PPC::XVCMPGEDP
;
3948 llvm_unreachable("Invalid floating-point vector compare condition");
3950 /* Handle some cases by swapping input operands. */
3952 case ISD::SETGE
: CC
= ISD::SETLE
; Swap
= true; break;
3953 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
3954 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
3955 case ISD::SETULT
: CC
= ISD::SETUGT
; Swap
= true; break;
3958 /* Handle some cases by negating the result. */
3960 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
3961 case ISD::SETUNE
: CC
= ISD::SETUEQ
; Negate
= true; break;
3962 case ISD::SETLE
: CC
= ISD::SETGT
; Negate
= true; break;
3963 case ISD::SETULE
: CC
= ISD::SETUGT
; Negate
= true; break;
3966 /* We have instructions implementing the remaining cases. */
3970 if (VecVT
== MVT::v16i8
)
3971 return PPC::VCMPEQUB
;
3972 else if (VecVT
== MVT::v8i16
)
3973 return PPC::VCMPEQUH
;
3974 else if (VecVT
== MVT::v4i32
)
3975 return PPC::VCMPEQUW
;
3976 else if (VecVT
== MVT::v2i64
)
3977 return PPC::VCMPEQUD
;
3980 if (VecVT
== MVT::v16i8
)
3981 return PPC::VCMPGTSB
;
3982 else if (VecVT
== MVT::v8i16
)
3983 return PPC::VCMPGTSH
;
3984 else if (VecVT
== MVT::v4i32
)
3985 return PPC::VCMPGTSW
;
3986 else if (VecVT
== MVT::v2i64
)
3987 return PPC::VCMPGTSD
;
3990 if (VecVT
== MVT::v16i8
)
3991 return PPC::VCMPGTUB
;
3992 else if (VecVT
== MVT::v8i16
)
3993 return PPC::VCMPGTUH
;
3994 else if (VecVT
== MVT::v4i32
)
3995 return PPC::VCMPGTUW
;
3996 else if (VecVT
== MVT::v2i64
)
3997 return PPC::VCMPGTUD
;
4002 llvm_unreachable("Invalid integer vector compare condition");
4006 bool PPCDAGToDAGISel::trySETCC(SDNode
*N
) {
4009 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
4011 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
4012 bool isPPC64
= (PtrVT
== MVT::i64
);
4014 if (!PPCSubTarget
->useCRBits() &&
4015 isInt32Immediate(N
->getOperand(1), Imm
)) {
4016 // We can codegen setcc op, imm very efficiently compared to a brcond.
4017 // Check for those cases here.
4020 SDValue Op
= N
->getOperand(0);
4024 Op
= SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Op
), 0);
4025 SDValue Ops
[] = { Op
, getI32Imm(27, dl
), getI32Imm(5, dl
),
4026 getI32Imm(31, dl
) };
4027 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4033 SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4034 Op
, getI32Imm(~0U, dl
)), 0);
4035 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, AD
, Op
, AD
.getValue(1));
4039 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4040 getI32Imm(31, dl
) };
4041 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4046 SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Op
), 0);
4047 T
= SDValue(CurDAG
->getMachineNode(PPC::ANDC
, dl
, MVT::i32
, T
, Op
), 0);
4048 SDValue Ops
[] = { T
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4049 getI32Imm(31, dl
) };
4050 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4054 } else if (Imm
== ~0U) { // setcc op, -1
4055 SDValue Op
= N
->getOperand(0);
4060 Op
= SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4061 Op
, getI32Imm(1, dl
)), 0);
4062 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
,
4063 SDValue(CurDAG
->getMachineNode(PPC::LI
, dl
,
4066 0), Op
.getValue(1));
4070 Op
= SDValue(CurDAG
->getMachineNode(PPC::NOR
, dl
, MVT::i32
, Op
, Op
), 0);
4071 SDNode
*AD
= CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4072 Op
, getI32Imm(~0U, dl
));
4073 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(AD
, 0), Op
,
4078 SDValue AD
= SDValue(CurDAG
->getMachineNode(PPC::ADDI
, dl
, MVT::i32
, Op
,
4079 getI32Imm(1, dl
)), 0);
4080 SDValue AN
= SDValue(CurDAG
->getMachineNode(PPC::AND
, dl
, MVT::i32
, AD
,
4082 SDValue Ops
[] = { AN
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4083 getI32Imm(31, dl
) };
4084 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4088 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4089 getI32Imm(31, dl
) };
4090 Op
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4091 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Op
, getI32Imm(1, dl
));
4098 SDValue LHS
= N
->getOperand(0);
4099 SDValue RHS
= N
->getOperand(1);
4101 // Altivec Vector compare instructions do not set any CR register by default and
4102 // vector compare operations return the same type as the operands.
4103 if (LHS
.getValueType().isVector()) {
4104 if (PPCSubTarget
->hasQPX() || PPCSubTarget
->hasSPE())
4107 EVT VecVT
= LHS
.getValueType();
4109 unsigned int VCmpInst
= getVCmpInst(VecVT
.getSimpleVT(), CC
,
4110 PPCSubTarget
->hasVSX(), Swap
, Negate
);
4112 std::swap(LHS
, RHS
);
4114 EVT ResVT
= VecVT
.changeVectorElementTypeToInteger();
4116 SDValue
VCmp(CurDAG
->getMachineNode(VCmpInst
, dl
, ResVT
, LHS
, RHS
), 0);
4117 CurDAG
->SelectNodeTo(N
, PPCSubTarget
->hasVSX() ? PPC::XXLNOR
: PPC::VNOR
,
4122 CurDAG
->SelectNodeTo(N
, VCmpInst
, ResVT
, LHS
, RHS
);
4126 if (PPCSubTarget
->useCRBits())
4130 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
4131 SDValue CCReg
= SelectCC(LHS
, RHS
, CC
, dl
);
4134 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4135 // The correct compare instruction is already set by SelectCC()
4136 if (PPCSubTarget
->hasSPE() && LHS
.getValueType().isFloatingPoint()) {
4140 // Force the ccreg into CR7.
4141 SDValue CR7Reg
= CurDAG
->getRegister(PPC::CR7
, MVT::i32
);
4143 SDValue
InFlag(nullptr, 0); // Null incoming flag value.
4144 CCReg
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, CR7Reg
, CCReg
,
4145 InFlag
).getValue(1);
4147 IntCR
= SDValue(CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
, CR7Reg
,
4150 SDValue Ops
[] = { IntCR
, getI32Imm((32 - (3 - Idx
)) & 31, dl
),
4151 getI32Imm(31, dl
), getI32Imm(31, dl
) };
4153 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4157 // Get the specified bit.
4159 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4160 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Tmp
, getI32Imm(1, dl
));
4164 /// Does this node represent a load/store node whose address can be represented
4165 /// with a register plus an immediate that's a multiple of \p Val:
4166 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const {
4167 LoadSDNode
*LDN
= dyn_cast
<LoadSDNode
>(N
);
4168 StoreSDNode
*STN
= dyn_cast
<StoreSDNode
>(N
);
4171 AddrOp
= LDN
->getOperand(1);
4173 AddrOp
= STN
->getOperand(2);
4175 // If the address points a frame object or a frame object with an offset,
4176 // we need to check the object alignment.
4178 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(
4179 AddrOp
.getOpcode() == ISD::ADD
? AddrOp
.getOperand(0) :
4181 // If op0 is a frame index that is under aligned, we can't do it either,
4182 // because it is translated to r31 or r1 + slot + offset. We won't know the
4183 // slot number until the stack frame is finalized.
4184 const MachineFrameInfo
&MFI
= CurDAG
->getMachineFunction().getFrameInfo();
4185 unsigned SlotAlign
= MFI
.getObjectAlignment(FI
->getIndex());
4186 if ((SlotAlign
% Val
) != 0)
4189 // If we have an offset, we need further check on the offset.
4190 if (AddrOp
.getOpcode() != ISD::ADD
)
4194 if (AddrOp
.getOpcode() == ISD::ADD
)
4195 return isIntS16Immediate(AddrOp
.getOperand(1), Imm
) && !(Imm
% Val
);
4197 // If the address comes from the outside, the offset will be zero.
4198 return AddrOp
.getOpcode() == ISD::CopyFromReg
;
4201 void PPCDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
4202 // Transfer memoperands.
4203 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
4204 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
4207 static bool mayUseP9Setb(SDNode
*N
, const ISD::CondCode
&CC
, SelectionDAG
*DAG
,
4208 bool &NeedSwapOps
, bool &IsUnCmp
) {
4210 assert(N
->getOpcode() == ISD::SELECT_CC
&& "Expecting a SELECT_CC here.");
4212 SDValue LHS
= N
->getOperand(0);
4213 SDValue RHS
= N
->getOperand(1);
4214 SDValue TrueRes
= N
->getOperand(2);
4215 SDValue FalseRes
= N
->getOperand(3);
4216 ConstantSDNode
*TrueConst
= dyn_cast
<ConstantSDNode
>(TrueRes
);
4220 assert((N
->getSimpleValueType(0) == MVT::i64
||
4221 N
->getSimpleValueType(0) == MVT::i32
) &&
4222 "Expecting either i64 or i32 here.");
4224 // We are looking for any of:
4225 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4226 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4227 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4228 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4229 int64_t TrueResVal
= TrueConst
->getSExtValue();
4230 if ((TrueResVal
< -1 || TrueResVal
> 1) ||
4231 (TrueResVal
== -1 && FalseRes
.getOpcode() != ISD::ZERO_EXTEND
) ||
4232 (TrueResVal
== 1 && FalseRes
.getOpcode() != ISD::SIGN_EXTEND
) ||
4234 (FalseRes
.getOpcode() != ISD::SELECT_CC
|| CC
!= ISD::SETEQ
)))
4237 bool InnerIsSel
= FalseRes
.getOpcode() == ISD::SELECT_CC
;
4238 SDValue SetOrSelCC
= InnerIsSel
? FalseRes
: FalseRes
.getOperand(0);
4239 if (SetOrSelCC
.getOpcode() != ISD::SETCC
&&
4240 SetOrSelCC
.getOpcode() != ISD::SELECT_CC
)
4243 // Without this setb optimization, the outer SELECT_CC will be manually
4244 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4245 // transforms pseudo instruction to isel instruction. When there are more than
4246 // one use for result like zext/sext, with current optimization we only see
4247 // isel is replaced by setb but can't see any significant gain. Since
4248 // setb has longer latency than original isel, we should avoid this. Another
4249 // point is that setb requires comparison always kept, it can break the
4250 // opportunity to get the comparison away if we have in future.
4251 if (!SetOrSelCC
.hasOneUse() || (!InnerIsSel
&& !FalseRes
.hasOneUse()))
4254 SDValue InnerLHS
= SetOrSelCC
.getOperand(0);
4255 SDValue InnerRHS
= SetOrSelCC
.getOperand(1);
4256 ISD::CondCode InnerCC
=
4257 cast
<CondCodeSDNode
>(SetOrSelCC
.getOperand(InnerIsSel
? 4 : 2))->get();
4258 // If the inner comparison is a select_cc, make sure the true/false values are
4259 // 1/-1 and canonicalize it if needed.
4261 ConstantSDNode
*SelCCTrueConst
=
4262 dyn_cast
<ConstantSDNode
>(SetOrSelCC
.getOperand(2));
4263 ConstantSDNode
*SelCCFalseConst
=
4264 dyn_cast
<ConstantSDNode
>(SetOrSelCC
.getOperand(3));
4265 if (!SelCCTrueConst
|| !SelCCFalseConst
)
4267 int64_t SelCCTVal
= SelCCTrueConst
->getSExtValue();
4268 int64_t SelCCFVal
= SelCCFalseConst
->getSExtValue();
4269 // The values must be -1/1 (requiring a swap) or 1/-1.
4270 if (SelCCTVal
== -1 && SelCCFVal
== 1) {
4271 std::swap(InnerLHS
, InnerRHS
);
4272 } else if (SelCCTVal
!= 1 || SelCCFVal
!= -1)
4276 // Canonicalize unsigned case
4277 if (InnerCC
== ISD::SETULT
|| InnerCC
== ISD::SETUGT
) {
4279 InnerCC
= (InnerCC
== ISD::SETULT
) ? ISD::SETLT
: ISD::SETGT
;
4282 bool InnerSwapped
= false;
4283 if (LHS
== InnerRHS
&& RHS
== InnerLHS
)
4284 InnerSwapped
= true;
4285 else if (LHS
!= InnerLHS
|| RHS
!= InnerRHS
)
4289 // (select_cc lhs, rhs, 0, \
4290 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4294 if (InnerCC
!= ISD::SETLT
&& InnerCC
!= ISD::SETGT
)
4296 NeedSwapOps
= (InnerCC
== ISD::SETGT
) ? InnerSwapped
: !InnerSwapped
;
4299 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4300 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4301 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4302 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4303 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4304 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4306 if (!IsUnCmp
&& InnerCC
!= ISD::SETNE
)
4311 if (InnerCC
== ISD::SETNE
|| (InnerCC
== ISD::SETGT
&& !InnerSwapped
) ||
4312 (InnerCC
== ISD::SETLT
&& InnerSwapped
))
4313 NeedSwapOps
= (TrueResVal
== 1);
4318 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4319 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4320 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4321 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4322 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4323 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4325 if (!IsUnCmp
&& InnerCC
!= ISD::SETNE
)
4330 if (InnerCC
== ISD::SETNE
|| (InnerCC
== ISD::SETLT
&& !InnerSwapped
) ||
4331 (InnerCC
== ISD::SETGT
&& InnerSwapped
))
4332 NeedSwapOps
= (TrueResVal
== -1);
4341 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4342 LLVM_DEBUG(N
->dump());
4347 // Select - Convert the specified operand from a target-independent to a
4348 // target-specific node if it hasn't already been changed.
4349 void PPCDAGToDAGISel::Select(SDNode
*N
) {
4351 if (N
->isMachineOpcode()) {
4353 return; // Already selected.
4356 // In case any misguided DAG-level optimizations form an ADD with a
4357 // TargetConstant operand, crash here instead of miscompiling (by selecting
4358 // an r+r add instead of some kind of r+i add).
4359 if (N
->getOpcode() == ISD::ADD
&&
4360 N
->getOperand(1).getOpcode() == ISD::TargetConstant
)
4361 llvm_unreachable("Invalid ADD with TargetConstant operand");
4363 // Try matching complex bit permutations before doing anything else.
4364 if (tryBitPermutation(N
))
4367 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4368 if (tryIntCompareInGPR(N
))
4371 switch (N
->getOpcode()) {
4375 if (N
->getValueType(0) == MVT::i64
) {
4376 ReplaceNode(N
, selectI64Imm(CurDAG
, N
));
4385 // These nodes will be transformed into GETtlsADDR32 node, which
4386 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
4387 case PPCISD::ADDI_TLSLD_L_ADDR
:
4388 case PPCISD::ADDI_TLSGD_L_ADDR
: {
4389 const Module
*Mod
= MF
->getFunction().getParent();
4390 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) != MVT::i32
||
4391 !PPCSubTarget
->isSecurePlt() || !PPCSubTarget
->isTargetELF() ||
4392 Mod
->getPICLevel() == PICLevel::SmallPIC
)
4394 // Attach global base pointer on GETtlsADDR32 node in order to
4395 // generate secure plt code for TLS symbols.
4398 case PPCISD::CALL
: {
4399 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) != MVT::i32
||
4400 !TM
.isPositionIndependent() || !PPCSubTarget
->isSecurePlt() ||
4401 !PPCSubTarget
->isTargetELF())
4404 SDValue Op
= N
->getOperand(1);
4406 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
4407 if (GA
->getTargetFlags() == PPCII::MO_PLT
)
4410 else if (ExternalSymbolSDNode
*ES
= dyn_cast
<ExternalSymbolSDNode
>(Op
)) {
4411 if (ES
->getTargetFlags() == PPCII::MO_PLT
)
4417 case PPCISD::GlobalBaseReg
:
4418 ReplaceNode(N
, getGlobalBaseReg());
4421 case ISD::FrameIndex
:
4422 selectFrameIndex(N
, N
);
4425 case PPCISD::MFOCRF
: {
4426 SDValue InFlag
= N
->getOperand(1);
4427 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
,
4428 N
->getOperand(0), InFlag
));
4432 case PPCISD::READ_TIME_BASE
:
4433 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ReadTB
, dl
, MVT::i32
, MVT::i32
,
4434 MVT::Other
, N
->getOperand(0)));
4437 case PPCISD::SRA_ADDZE
: {
4438 SDValue N0
= N
->getOperand(0);
4440 CurDAG
->getTargetConstant(*cast
<ConstantSDNode
>(N
->getOperand(1))->
4441 getConstantIntValue(), dl
,
4442 N
->getValueType(0));
4443 if (N
->getValueType(0) == MVT::i64
) {
4445 CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, MVT::Glue
,
4447 CurDAG
->SelectNodeTo(N
, PPC::ADDZE8
, MVT::i64
, SDValue(Op
, 0),
4451 assert(N
->getValueType(0) == MVT::i32
&&
4452 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4454 CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, MVT::Glue
,
4456 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
, SDValue(Op
, 0),
4463 // Change TLS initial-exec D-form stores to X-form stores.
4464 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
4465 if (EnableTLSOpt
&& PPCSubTarget
->isELFv2ABI() &&
4466 ST
->getAddressingMode() != ISD::PRE_INC
)
4467 if (tryTLSXFormStore(ST
))
4472 // Handle preincrement loads.
4473 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
4474 EVT LoadedVT
= LD
->getMemoryVT();
4476 // Normal loads are handled by code generated from the .td file.
4477 if (LD
->getAddressingMode() != ISD::PRE_INC
) {
4478 // Change TLS initial-exec D-form loads to X-form loads.
4479 if (EnableTLSOpt
&& PPCSubTarget
->isELFv2ABI())
4480 if (tryTLSXFormLoad(LD
))
4485 SDValue Offset
= LD
->getOffset();
4486 if (Offset
.getOpcode() == ISD::TargetConstant
||
4487 Offset
.getOpcode() == ISD::TargetGlobalAddress
) {
4490 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
4491 if (LD
->getValueType(0) != MVT::i64
) {
4492 // Handle PPC32 integer and normal FP loads.
4493 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4494 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4495 default: llvm_unreachable("Invalid PPC load type!");
4496 case MVT::f64
: Opcode
= PPC::LFDU
; break;
4497 case MVT::f32
: Opcode
= PPC::LFSU
; break;
4498 case MVT::i32
: Opcode
= PPC::LWZU
; break;
4499 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU
: PPC::LHZU
; break;
4501 case MVT::i8
: Opcode
= PPC::LBZU
; break;
4504 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
4505 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4506 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4507 default: llvm_unreachable("Invalid PPC load type!");
4508 case MVT::i64
: Opcode
= PPC::LDU
; break;
4509 case MVT::i32
: Opcode
= PPC::LWZU8
; break;
4510 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU8
: PPC::LHZU8
; break;
4512 case MVT::i8
: Opcode
= PPC::LBZU8
; break;
4516 SDValue Chain
= LD
->getChain();
4517 SDValue Base
= LD
->getBasePtr();
4518 SDValue Ops
[] = { Offset
, Base
, Chain
};
4519 SDNode
*MN
= CurDAG
->getMachineNode(
4520 Opcode
, dl
, LD
->getValueType(0),
4521 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
4522 transferMemOperands(N
, MN
);
4527 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
4528 if (LD
->getValueType(0) != MVT::i64
) {
4529 // Handle PPC32 integer and normal FP loads.
4530 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
4531 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4532 default: llvm_unreachable("Invalid PPC load type!");
4533 case MVT::v4f64
: Opcode
= PPC::QVLFDUX
; break; // QPX
4534 case MVT::v4f32
: Opcode
= PPC::QVLFSUX
; break; // QPX
4535 case MVT::f64
: Opcode
= PPC::LFDUX
; break;
4536 case MVT::f32
: Opcode
= PPC::LFSUX
; break;
4537 case MVT::i32
: Opcode
= PPC::LWZUX
; break;
4538 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX
: PPC::LHZUX
; break;
4540 case MVT::i8
: Opcode
= PPC::LBZUX
; break;
4543 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
4544 assert((!isSExt
|| LoadedVT
== MVT::i16
|| LoadedVT
== MVT::i32
) &&
4545 "Invalid sext update load");
4546 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
4547 default: llvm_unreachable("Invalid PPC load type!");
4548 case MVT::i64
: Opcode
= PPC::LDUX
; break;
4549 case MVT::i32
: Opcode
= isSExt
? PPC::LWAUX
: PPC::LWZUX8
; break;
4550 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX8
: PPC::LHZUX8
; break;
4552 case MVT::i8
: Opcode
= PPC::LBZUX8
; break;
4556 SDValue Chain
= LD
->getChain();
4557 SDValue Base
= LD
->getBasePtr();
4558 SDValue Ops
[] = { Base
, Offset
, Chain
};
4559 SDNode
*MN
= CurDAG
->getMachineNode(
4560 Opcode
, dl
, LD
->getValueType(0),
4561 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
4562 transferMemOperands(N
, MN
);
4569 unsigned Imm
, Imm2
, SH
, MB
, ME
;
4572 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4573 // with a mask, emit rlwinm
4574 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4575 isRotateAndMask(N
->getOperand(0).getNode(), Imm
, false, SH
, MB
, ME
)) {
4576 SDValue Val
= N
->getOperand(0).getOperand(0);
4577 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4578 getI32Imm(ME
, dl
) };
4579 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4582 // If this is just a masked value where the input is not handled above, and
4583 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4584 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4585 isRunOfOnes(Imm
, MB
, ME
) &&
4586 N
->getOperand(0).getOpcode() != ISD::ROTL
) {
4587 SDValue Val
= N
->getOperand(0);
4588 SDValue Ops
[] = { Val
, getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4589 getI32Imm(ME
, dl
) };
4590 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4593 // If this is a 64-bit zero-extension mask, emit rldicl.
4594 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
4596 SDValue Val
= N
->getOperand(0);
4597 MB
= 64 - countTrailingOnes(Imm64
);
4600 if (Val
.getOpcode() == ISD::ANY_EXTEND
) {
4601 auto Op0
= Val
.getOperand(0);
4602 if ( Op0
.getOpcode() == ISD::SRL
&&
4603 isInt32Immediate(Op0
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4605 auto ResultType
= Val
.getNode()->getValueType(0);
4606 auto ImDef
= CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
4608 SDValue
IDVal (ImDef
, 0);
4610 Val
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
4611 ResultType
, IDVal
, Op0
.getOperand(0),
4612 getI32Imm(1, dl
)), 0);
4617 // If the operand is a logical right shift, we can fold it into this
4618 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4619 // for n <= mb. The right shift is really a left rotate followed by a
4620 // mask, and this mask is a more-restrictive sub-mask of the mask implied
4622 if (Val
.getOpcode() == ISD::SRL
&&
4623 isInt32Immediate(Val
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4624 assert(Imm
< 64 && "Illegal shift amount");
4625 Val
= Val
.getOperand(0);
4629 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
4630 CurDAG
->SelectNodeTo(N
, PPC::RLDICL
, MVT::i64
, Ops
);
4633 // If this is a negated 64-bit zero-extension mask,
4634 // i.e. the immediate is a sequence of ones from most significant side
4635 // and all zero for reminder, we should use rldicr.
4636 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
4637 isMask_64(~Imm64
)) {
4638 SDValue Val
= N
->getOperand(0);
4639 MB
= 63 - countTrailingOnes(~Imm64
);
4641 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
4642 CurDAG
->SelectNodeTo(N
, PPC::RLDICR
, MVT::i64
, Ops
);
4646 // AND X, 0 -> 0, not "rlwinm 32".
4647 if (isInt32Immediate(N
->getOperand(1), Imm
) && (Imm
== 0)) {
4648 ReplaceUses(SDValue(N
, 0), N
->getOperand(1));
4651 // ISD::OR doesn't get all the bitfield insertion fun.
4652 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4654 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
4655 N
->getOperand(0).getOpcode() == ISD::OR
&&
4656 isInt32Immediate(N
->getOperand(0).getOperand(1), Imm2
)) {
4657 // The idea here is to check whether this is equivalent to:
4658 // (c1 & m) | (x & ~m)
4659 // where m is a run-of-ones mask. The logic here is that, for each bit in
4661 // - if both are 1, then the output will be 1.
4662 // - if both are 0, then the output will be 0.
4663 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4665 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4667 // If that last condition is never the case, then we can form m from the
4668 // bits that are the same between c1 and c2.
4670 if (isRunOfOnes(~(Imm
^Imm2
), MB
, ME
) && !(~Imm
& Imm2
)) {
4671 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4672 N
->getOperand(0).getOperand(1),
4673 getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4674 getI32Imm(ME
, dl
) };
4675 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
4680 // Other cases are autogenerated.
4684 if (N
->getValueType(0) == MVT::i32
)
4685 if (tryBitfieldInsert(N
))
4689 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
4690 isIntS16Immediate(N
->getOperand(1), Imm
)) {
4691 KnownBits LHSKnown
= CurDAG
->computeKnownBits(N
->getOperand(0));
4693 // If this is equivalent to an add, then we can fold it with the
4694 // FrameIndex calculation.
4695 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)Imm
) == ~0ULL) {
4696 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
4701 // OR with a 32-bit immediate can be handled by ori + oris
4702 // without creating an immediate in a GPR.
4704 bool IsPPC64
= PPCSubTarget
->isPPC64();
4705 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
4706 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
4707 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4708 uint64_t ImmHi
= Imm64
>> 16;
4709 uint64_t ImmLo
= Imm64
& 0xFFFF;
4710 if (ImmHi
!= 0 && ImmLo
!= 0) {
4711 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
4713 getI16Imm(ImmLo
, dl
));
4714 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
4715 CurDAG
->SelectNodeTo(N
, PPC::ORIS8
, MVT::i64
, Ops1
);
4720 // Other cases are autogenerated.
4724 // XOR with a 32-bit immediate can be handled by xori + xoris
4725 // without creating an immediate in a GPR.
4727 bool IsPPC64
= PPCSubTarget
->isPPC64();
4728 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
4729 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
4730 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4731 uint64_t ImmHi
= Imm64
>> 16;
4732 uint64_t ImmLo
= Imm64
& 0xFFFF;
4733 if (ImmHi
!= 0 && ImmLo
!= 0) {
4734 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
4736 getI16Imm(ImmLo
, dl
));
4737 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
4738 CurDAG
->SelectNodeTo(N
, PPC::XORIS8
, MVT::i64
, Ops1
);
4747 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
4748 isIntS16Immediate(N
->getOperand(1), Imm
)) {
4749 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
4756 unsigned Imm
, SH
, MB
, ME
;
4757 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
4758 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
4759 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4760 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4761 getI32Imm(ME
, dl
) };
4762 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4766 // Other cases are autogenerated.
4770 unsigned Imm
, SH
, MB
, ME
;
4771 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
4772 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
4773 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
4774 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4775 getI32Imm(ME
, dl
) };
4776 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4780 // Other cases are autogenerated.
4783 // FIXME: Remove this once the ANDI glue bug is fixed:
4784 case PPCISD::ANDIo_1_EQ_BIT
:
4785 case PPCISD::ANDIo_1_GT_BIT
: {
4789 EVT InVT
= N
->getOperand(0).getValueType();
4790 assert((InVT
== MVT::i64
|| InVT
== MVT::i32
) &&
4791 "Invalid input type for ANDIo_1_EQ_BIT");
4793 unsigned Opcode
= (InVT
== MVT::i64
) ? PPC::ANDIo8
: PPC::ANDIo
;
4794 SDValue
AndI(CurDAG
->getMachineNode(Opcode
, dl
, InVT
, MVT::Glue
,
4796 CurDAG
->getTargetConstant(1, dl
, InVT
)),
4798 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
4800 CurDAG
->getTargetConstant(N
->getOpcode() == PPCISD::ANDIo_1_EQ_BIT
?
4801 PPC::sub_eq
: PPC::sub_gt
, dl
, MVT::i32
);
4803 CurDAG
->SelectNodeTo(N
, TargetOpcode::EXTRACT_SUBREG
, MVT::i1
, CR0Reg
,
4804 SRIdxVal
, SDValue(AndI
.getNode(), 1) /* glue */);
4807 case ISD::SELECT_CC
: {
4808 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
4810 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
4811 bool isPPC64
= (PtrVT
== MVT::i64
);
4813 // If this is a select of i1 operands, we'll pattern match it.
4814 if (PPCSubTarget
->useCRBits() &&
4815 N
->getOperand(0).getValueType() == MVT::i1
)
4818 if (PPCSubTarget
->isISA3_0() && PPCSubTarget
->isPPC64()) {
4819 bool NeedSwapOps
= false;
4820 bool IsUnCmp
= false;
4821 if (mayUseP9Setb(N
, CC
, CurDAG
, NeedSwapOps
, IsUnCmp
)) {
4822 SDValue LHS
= N
->getOperand(0);
4823 SDValue RHS
= N
->getOperand(1);
4825 std::swap(LHS
, RHS
);
4827 // Make use of SelectCC to generate the comparison to set CR bits, for
4828 // equality comparisons having one literal operand, SelectCC probably
4829 // doesn't need to materialize the whole literal and just use xoris to
4830 // check it first, it leads the following comparison result can't
4831 // exactly represent GT/LT relationship. So to avoid this we specify
4832 // SETGT/SETUGT here instead of SETEQ.
4834 SelectCC(LHS
, RHS
, IsUnCmp
? ISD::SETUGT
: ISD::SETGT
, dl
);
4835 CurDAG
->SelectNodeTo(
4836 N
, N
->getSimpleValueType(0) == MVT::i64
? PPC::SETB8
: PPC::SETB
,
4837 N
->getValueType(0), GenCC
);
4843 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
4845 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
4846 if (ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
4847 if (ConstantSDNode
*N3C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
4848 if (N1C
->isNullValue() && N3C
->isNullValue() &&
4849 N2C
->getZExtValue() == 1ULL && CC
== ISD::SETNE
&&
4850 // FIXME: Implement this optzn for PPC64.
4851 N
->getValueType(0) == MVT::i32
) {
4853 CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4854 N
->getOperand(0), getI32Imm(~0U, dl
));
4855 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(Tmp
, 0),
4856 N
->getOperand(0), SDValue(Tmp
, 1));
4860 SDValue CCReg
= SelectCC(N
->getOperand(0), N
->getOperand(1), CC
, dl
);
4862 if (N
->getValueType(0) == MVT::i1
) {
4863 // An i1 select is: (c & t) | (!c & f).
4865 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
4869 default: llvm_unreachable("Invalid CC index");
4870 case 0: SRI
= PPC::sub_lt
; break;
4871 case 1: SRI
= PPC::sub_gt
; break;
4872 case 2: SRI
= PPC::sub_eq
; break;
4873 case 3: SRI
= PPC::sub_un
; break;
4876 SDValue CCBit
= CurDAG
->getTargetExtractSubreg(SRI
, dl
, MVT::i1
, CCReg
);
4878 SDValue
NotCCBit(CurDAG
->getMachineNode(PPC::CRNOR
, dl
, MVT::i1
,
4880 SDValue C
= Inv
? NotCCBit
: CCBit
,
4881 NotC
= Inv
? CCBit
: NotCCBit
;
4883 SDValue
CAndT(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
4884 C
, N
->getOperand(2)), 0);
4885 SDValue
NotCAndF(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
4886 NotC
, N
->getOperand(3)), 0);
4888 CurDAG
->SelectNodeTo(N
, PPC::CROR
, MVT::i1
, CAndT
, NotCAndF
);
4892 unsigned BROpc
= getPredicateForSetCC(CC
);
4894 unsigned SelectCCOp
;
4895 if (N
->getValueType(0) == MVT::i32
)
4896 SelectCCOp
= PPC::SELECT_CC_I4
;
4897 else if (N
->getValueType(0) == MVT::i64
)
4898 SelectCCOp
= PPC::SELECT_CC_I8
;
4899 else if (N
->getValueType(0) == MVT::f32
) {
4900 if (PPCSubTarget
->hasP8Vector())
4901 SelectCCOp
= PPC::SELECT_CC_VSSRC
;
4902 else if (PPCSubTarget
->hasSPE())
4903 SelectCCOp
= PPC::SELECT_CC_SPE4
;
4905 SelectCCOp
= PPC::SELECT_CC_F4
;
4906 } else if (N
->getValueType(0) == MVT::f64
) {
4907 if (PPCSubTarget
->hasVSX())
4908 SelectCCOp
= PPC::SELECT_CC_VSFRC
;
4909 else if (PPCSubTarget
->hasSPE())
4910 SelectCCOp
= PPC::SELECT_CC_SPE
;
4912 SelectCCOp
= PPC::SELECT_CC_F8
;
4913 } else if (N
->getValueType(0) == MVT::f128
)
4914 SelectCCOp
= PPC::SELECT_CC_F16
;
4915 else if (PPCSubTarget
->hasSPE())
4916 SelectCCOp
= PPC::SELECT_CC_SPE
;
4917 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f64
)
4918 SelectCCOp
= PPC::SELECT_CC_QFRC
;
4919 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f32
)
4920 SelectCCOp
= PPC::SELECT_CC_QSRC
;
4921 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4i1
)
4922 SelectCCOp
= PPC::SELECT_CC_QBRC
;
4923 else if (N
->getValueType(0) == MVT::v2f64
||
4924 N
->getValueType(0) == MVT::v2i64
)
4925 SelectCCOp
= PPC::SELECT_CC_VSRC
;
4927 SelectCCOp
= PPC::SELECT_CC_VRRC
;
4929 SDValue Ops
[] = { CCReg
, N
->getOperand(2), N
->getOperand(3),
4930 getI32Imm(BROpc
, dl
) };
4931 CurDAG
->SelectNodeTo(N
, SelectCCOp
, N
->getValueType(0), Ops
);
4934 case ISD::VECTOR_SHUFFLE
:
4935 if (PPCSubTarget
->hasVSX() && (N
->getValueType(0) == MVT::v2f64
||
4936 N
->getValueType(0) == MVT::v2i64
)) {
4937 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
4939 SDValue Op1
= N
->getOperand(SVN
->getMaskElt(0) < 2 ? 0 : 1),
4940 Op2
= N
->getOperand(SVN
->getMaskElt(1) < 2 ? 0 : 1);
4943 for (int i
= 0; i
< 2; ++i
)
4944 if (SVN
->getMaskElt(i
) <= 0 || SVN
->getMaskElt(i
) == 2)
4949 if (Op1
== Op2
&& DM
[0] == 0 && DM
[1] == 0 &&
4950 Op1
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
4951 isa
<LoadSDNode
>(Op1
.getOperand(0))) {
4952 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op1
.getOperand(0));
4953 SDValue Base
, Offset
;
4955 if (LD
->isUnindexed() && LD
->hasOneUse() && Op1
.hasOneUse() &&
4956 (LD
->getMemoryVT() == MVT::f64
||
4957 LD
->getMemoryVT() == MVT::i64
) &&
4958 SelectAddrIdxOnly(LD
->getBasePtr(), Base
, Offset
)) {
4959 SDValue Chain
= LD
->getChain();
4960 SDValue Ops
[] = { Base
, Offset
, Chain
};
4961 MachineMemOperand
*MemOp
= LD
->getMemOperand();
4962 SDNode
*NewN
= CurDAG
->SelectNodeTo(N
, PPC::LXVDSX
,
4963 N
->getValueType(0), Ops
);
4964 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(NewN
), {MemOp
});
4969 // For little endian, we must swap the input operands and adjust
4970 // the mask elements (reverse and invert them).
4971 if (PPCSubTarget
->isLittleEndian()) {
4972 std::swap(Op1
, Op2
);
4973 unsigned tmp
= DM
[0];
4978 SDValue DMV
= CurDAG
->getTargetConstant(DM
[1] | (DM
[0] << 1), dl
,
4980 SDValue Ops
[] = { Op1
, Op2
, DMV
};
4981 CurDAG
->SelectNodeTo(N
, PPC::XXPERMDI
, N
->getValueType(0), Ops
);
4988 bool IsPPC64
= PPCSubTarget
->isPPC64();
4989 SDValue Ops
[] = { N
->getOperand(1), N
->getOperand(0) };
4990 CurDAG
->SelectNodeTo(N
, N
->getOpcode() == PPCISD::BDNZ
4991 ? (IsPPC64
? PPC::BDNZ8
: PPC::BDNZ
)
4992 : (IsPPC64
? PPC::BDZ8
: PPC::BDZ
),
4996 case PPCISD::COND_BRANCH
: {
4997 // Op #0 is the Chain.
4998 // Op #1 is the PPC::PRED_* number.
5000 // Op #3 is the Dest MBB
5001 // Op #4 is the Flag.
5002 // Prevent PPC::PRED_* from being selected into LI.
5003 unsigned PCC
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
5004 if (EnableBranchHint
)
5005 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(3));
5007 SDValue Pred
= getI32Imm(PCC
, dl
);
5008 SDValue Ops
[] = { Pred
, N
->getOperand(2), N
->getOperand(3),
5009 N
->getOperand(0), N
->getOperand(4) };
5010 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
5014 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
5015 unsigned PCC
= getPredicateForSetCC(CC
);
5017 if (N
->getOperand(2).getValueType() == MVT::i1
) {
5021 default: llvm_unreachable("Unexpected Boolean-operand predicate");
5022 case PPC::PRED_LT
: Opc
= PPC::CRANDC
; Swap
= true; break;
5023 case PPC::PRED_LE
: Opc
= PPC::CRORC
; Swap
= true; break;
5024 case PPC::PRED_EQ
: Opc
= PPC::CREQV
; Swap
= false; break;
5025 case PPC::PRED_GE
: Opc
= PPC::CRORC
; Swap
= false; break;
5026 case PPC::PRED_GT
: Opc
= PPC::CRANDC
; Swap
= false; break;
5027 case PPC::PRED_NE
: Opc
= PPC::CRXOR
; Swap
= false; break;
5030 // A signed comparison of i1 values produces the opposite result to an
5031 // unsigned one if the condition code includes less-than or greater-than.
5032 // This is because 1 is the most negative signed i1 number and the most
5033 // positive unsigned i1 number. The CR-logical operations used for such
5034 // comparisons are non-commutative so for signed comparisons vs. unsigned
5035 // ones, the input operands just need to be swapped.
5036 if (ISD::isSignedIntSetCC(CC
))
5039 SDValue
BitComp(CurDAG
->getMachineNode(Opc
, dl
, MVT::i1
,
5040 N
->getOperand(Swap
? 3 : 2),
5041 N
->getOperand(Swap
? 2 : 3)), 0);
5042 CurDAG
->SelectNodeTo(N
, PPC::BC
, MVT::Other
, BitComp
, N
->getOperand(4),
5047 if (EnableBranchHint
)
5048 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(4));
5050 SDValue CondCode
= SelectCC(N
->getOperand(2), N
->getOperand(3), CC
, dl
);
5051 SDValue Ops
[] = { getI32Imm(PCC
, dl
), CondCode
,
5052 N
->getOperand(4), N
->getOperand(0) };
5053 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
5057 // FIXME: Should custom lower this.
5058 SDValue Chain
= N
->getOperand(0);
5059 SDValue Target
= N
->getOperand(1);
5060 unsigned Opc
= Target
.getValueType() == MVT::i32
? PPC::MTCTR
: PPC::MTCTR8
;
5061 unsigned Reg
= Target
.getValueType() == MVT::i32
? PPC::BCTR
: PPC::BCTR8
;
5062 Chain
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, Target
,
5064 CurDAG
->SelectNodeTo(N
, Reg
, MVT::Other
, Chain
);
5067 case PPCISD::TOC_ENTRY
: {
5068 assert ((PPCSubTarget
->isPPC64() || PPCSubTarget
->isSVR4ABI()) &&
5069 "Only supported for 64-bit ABI and 32-bit SVR4");
5070 if (PPCSubTarget
->isSVR4ABI() && !PPCSubTarget
->isPPC64()) {
5071 SDValue GA
= N
->getOperand(0);
5072 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LWZtoc
, dl
, MVT::i32
, GA
,
5074 transferMemOperands(N
, MN
);
5079 // For medium and large code model, we generate two instructions as
5080 // described below. Otherwise we allow SelectCodeCommon to handle this,
5081 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
5082 CodeModel::Model CModel
= TM
.getCodeModel();
5083 if (CModel
!= CodeModel::Medium
&& CModel
!= CodeModel::Large
)
5086 // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
5087 // If it must be toc-referenced according to PPCSubTarget, we generate:
5088 // LDtocL(@sym, ADDIStocHA(%x2, @sym))
5089 // Otherwise we generate:
5090 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
5091 SDValue GA
= N
->getOperand(0);
5092 SDValue TOCbase
= N
->getOperand(1);
5093 SDNode
*Tmp
= CurDAG
->getMachineNode(PPC::ADDIStocHA
, dl
, MVT::i64
,
5095 if (PPCLowering
->isAccessedAsGotIndirect(GA
)) {
5096 // If it is access as got-indirect, we need an extra LD to load
5098 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LDtocL
, dl
, MVT::i64
, GA
,
5100 transferMemOperands(N
, MN
);
5105 // Build the address relative to the TOC-pointer..
5106 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ADDItocL
, dl
, MVT::i64
,
5107 SDValue(Tmp
, 0), GA
));
5110 case PPCISD::PPC32_PICGOT
:
5111 // Generate a PIC-safe GOT reference.
5112 assert(!PPCSubTarget
->isPPC64() && PPCSubTarget
->isSVR4ABI() &&
5113 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
5114 CurDAG
->SelectNodeTo(N
, PPC::PPC32PICGOT
,
5115 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()),
5119 case PPCISD::VADD_SPLAT
: {
5120 // This expands into one of three sequences, depending on whether
5121 // the first operand is odd or even, positive or negative.
5122 assert(isa
<ConstantSDNode
>(N
->getOperand(0)) &&
5123 isa
<ConstantSDNode
>(N
->getOperand(1)) &&
5124 "Invalid operand on VADD_SPLAT!");
5126 int Elt
= N
->getConstantOperandVal(0);
5127 int EltSize
= N
->getConstantOperandVal(1);
5128 unsigned Opc1
, Opc2
, Opc3
;
5132 Opc1
= PPC::VSPLTISB
;
5133 Opc2
= PPC::VADDUBM
;
5134 Opc3
= PPC::VSUBUBM
;
5136 } else if (EltSize
== 2) {
5137 Opc1
= PPC::VSPLTISH
;
5138 Opc2
= PPC::VADDUHM
;
5139 Opc3
= PPC::VSUBUHM
;
5142 assert(EltSize
== 4 && "Invalid element size on VADD_SPLAT!");
5143 Opc1
= PPC::VSPLTISW
;
5144 Opc2
= PPC::VADDUWM
;
5145 Opc3
= PPC::VSUBUWM
;
5149 if ((Elt
& 1) == 0) {
5150 // Elt is even, in the range [-32,-18] + [16,30].
5152 // Convert: VADD_SPLAT elt, size
5153 // Into: tmp = VSPLTIS[BHW] elt
5154 // VADDU[BHW]M tmp, tmp
5155 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
5156 SDValue EltVal
= getI32Imm(Elt
>> 1, dl
);
5157 SDNode
*Tmp
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5158 SDValue TmpVal
= SDValue(Tmp
, 0);
5159 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, TmpVal
, TmpVal
));
5161 } else if (Elt
> 0) {
5162 // Elt is odd and positive, in the range [17,31].
5164 // Convert: VADD_SPLAT elt, size
5165 // Into: tmp1 = VSPLTIS[BHW] elt-16
5166 // tmp2 = VSPLTIS[BHW] -16
5167 // VSUBU[BHW]M tmp1, tmp2
5168 SDValue EltVal
= getI32Imm(Elt
- 16, dl
);
5169 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5170 EltVal
= getI32Imm(-16, dl
);
5171 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5172 ReplaceNode(N
, CurDAG
->getMachineNode(Opc3
, dl
, VT
, SDValue(Tmp1
, 0),
5176 // Elt is odd and negative, in the range [-31,-17].
5178 // Convert: VADD_SPLAT elt, size
5179 // Into: tmp1 = VSPLTIS[BHW] elt+16
5180 // tmp2 = VSPLTIS[BHW] -16
5181 // VADDU[BHW]M tmp1, tmp2
5182 SDValue EltVal
= getI32Imm(Elt
+ 16, dl
);
5183 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5184 EltVal
= getI32Imm(-16, dl
);
5185 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5186 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, SDValue(Tmp1
, 0),
5196 // If the target supports the cmpb instruction, do the idiom recognition here.
5197 // We don't do this as a DAG combine because we don't want to do it as nodes
5198 // are being combined (because we might miss part of the eventual idiom). We
5199 // don't want to do it during instruction selection because we want to reuse
5200 // the logic for lowering the masking operations already part of the
5201 // instruction selector.
5202 SDValue
PPCDAGToDAGISel::combineToCMPB(SDNode
*N
) {
5205 assert(N
->getOpcode() == ISD::OR
&&
5206 "Only OR nodes are supported for CMPB");
5209 if (!PPCSubTarget
->hasCMPB())
5212 if (N
->getValueType(0) != MVT::i32
&&
5213 N
->getValueType(0) != MVT::i64
)
5216 EVT VT
= N
->getValueType(0);
5219 bool BytesFound
[8] = {false, false, false, false, false, false, false, false};
5220 uint64_t Mask
= 0, Alt
= 0;
5222 auto IsByteSelectCC
= [this](SDValue O
, unsigned &b
,
5223 uint64_t &Mask
, uint64_t &Alt
,
5224 SDValue
&LHS
, SDValue
&RHS
) {
5225 if (O
.getOpcode() != ISD::SELECT_CC
)
5227 ISD::CondCode CC
= cast
<CondCodeSDNode
>(O
.getOperand(4))->get();
5229 if (!isa
<ConstantSDNode
>(O
.getOperand(2)) ||
5230 !isa
<ConstantSDNode
>(O
.getOperand(3)))
5233 uint64_t PM
= O
.getConstantOperandVal(2);
5234 uint64_t PAlt
= O
.getConstantOperandVal(3);
5235 for (b
= 0; b
< 8; ++b
) {
5236 uint64_t Mask
= UINT64_C(0xFF) << (8*b
);
5237 if (PM
&& (PM
& Mask
) == PM
&& (PAlt
& Mask
) == PAlt
)
5246 if (!isa
<ConstantSDNode
>(O
.getOperand(1)) ||
5247 O
.getConstantOperandVal(1) != 0) {
5248 SDValue Op0
= O
.getOperand(0), Op1
= O
.getOperand(1);
5249 if (Op0
.getOpcode() == ISD::TRUNCATE
)
5250 Op0
= Op0
.getOperand(0);
5251 if (Op1
.getOpcode() == ISD::TRUNCATE
)
5252 Op1
= Op1
.getOperand(0);
5254 if (Op0
.getOpcode() == ISD::SRL
&& Op1
.getOpcode() == ISD::SRL
&&
5255 Op0
.getOperand(1) == Op1
.getOperand(1) && CC
== ISD::SETEQ
&&
5256 isa
<ConstantSDNode
>(Op0
.getOperand(1))) {
5258 unsigned Bits
= Op0
.getValueSizeInBits();
5261 if (Op0
.getConstantOperandVal(1) != Bits
-8)
5264 LHS
= Op0
.getOperand(0);
5265 RHS
= Op1
.getOperand(0);
5269 // When we have small integers (i16 to be specific), the form present
5270 // post-legalization uses SETULT in the SELECT_CC for the
5271 // higher-order byte, depending on the fact that the
5272 // even-higher-order bytes are known to all be zero, for example:
5273 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5274 // (so when the second byte is the same, because all higher-order
5275 // bits from bytes 3 and 4 are known to be zero, the result of the
5276 // xor can be at most 255)
5277 if (Op0
.getOpcode() == ISD::XOR
&& CC
== ISD::SETULT
&&
5278 isa
<ConstantSDNode
>(O
.getOperand(1))) {
5280 uint64_t ULim
= O
.getConstantOperandVal(1);
5281 if (ULim
!= (UINT64_C(1) << b
*8))
5284 // Now we need to make sure that the upper bytes are known to be
5286 unsigned Bits
= Op0
.getValueSizeInBits();
5287 if (!CurDAG
->MaskedValueIsZero(
5288 Op0
, APInt::getHighBitsSet(Bits
, Bits
- (b
+ 1) * 8)))
5291 LHS
= Op0
.getOperand(0);
5292 RHS
= Op0
.getOperand(1);
5299 if (CC
!= ISD::SETEQ
)
5302 SDValue Op
= O
.getOperand(0);
5303 if (Op
.getOpcode() == ISD::AND
) {
5304 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5306 if (Op
.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b
)))
5309 SDValue XOR
= Op
.getOperand(0);
5310 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5311 XOR
= XOR
.getOperand(0);
5312 if (XOR
.getOpcode() != ISD::XOR
)
5315 LHS
= XOR
.getOperand(0);
5316 RHS
= XOR
.getOperand(1);
5318 } else if (Op
.getOpcode() == ISD::SRL
) {
5319 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5321 unsigned Bits
= Op
.getValueSizeInBits();
5324 if (Op
.getConstantOperandVal(1) != Bits
-8)
5327 SDValue XOR
= Op
.getOperand(0);
5328 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5329 XOR
= XOR
.getOperand(0);
5330 if (XOR
.getOpcode() != ISD::XOR
)
5333 LHS
= XOR
.getOperand(0);
5334 RHS
= XOR
.getOperand(1);
5341 SmallVector
<SDValue
, 8> Queue(1, SDValue(N
, 0));
5342 while (!Queue
.empty()) {
5343 SDValue V
= Queue
.pop_back_val();
5345 for (const SDValue
&O
: V
.getNode()->ops()) {
5347 uint64_t M
= 0, A
= 0;
5349 if (O
.getOpcode() == ISD::OR
) {
5351 } else if (IsByteSelectCC(O
, b
, M
, A
, OLHS
, ORHS
)) {
5355 BytesFound
[b
] = true;
5358 } else if ((LHS
== ORHS
&& RHS
== OLHS
) ||
5359 (RHS
== ORHS
&& LHS
== OLHS
)) {
5360 BytesFound
[b
] = true;
5372 unsigned LastB
= 0, BCnt
= 0;
5373 for (unsigned i
= 0; i
< 8; ++i
)
5374 if (BytesFound
[LastB
]) {
5379 if (!LastB
|| BCnt
< 2)
5382 // Because we'll be zero-extending the output anyway if don't have a specific
5383 // value for each input byte (via the Mask), we can 'anyext' the inputs.
5384 if (LHS
.getValueType() != VT
) {
5385 LHS
= CurDAG
->getAnyExtOrTrunc(LHS
, dl
, VT
);
5386 RHS
= CurDAG
->getAnyExtOrTrunc(RHS
, dl
, VT
);
5389 Res
= CurDAG
->getNode(PPCISD::CMPB
, dl
, VT
, LHS
, RHS
);
5391 bool NonTrivialMask
= ((int64_t) Mask
) != INT64_C(-1);
5392 if (NonTrivialMask
&& !Alt
) {
5393 // Res = Mask & CMPB
5394 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
5395 CurDAG
->getConstant(Mask
, dl
, VT
));
5397 // Res = (CMPB & Mask) | (~CMPB & Alt)
5398 // Which, as suggested here:
5399 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5400 // can be written as:
5401 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5402 // useful because the (Alt ^ Mask) can be pre-computed.
5403 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
5404 CurDAG
->getConstant(Mask
^ Alt
, dl
, VT
));
5405 Res
= CurDAG
->getNode(ISD::XOR
, dl
, VT
, Res
,
5406 CurDAG
->getConstant(Alt
, dl
, VT
));
5412 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5413 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5414 // involves constant materialization of a 0 or a 1 or both. If the result of
5415 // the extension is then operated upon by some operator that can be constant
5416 // folded with a constant 0 or 1, and that constant can be materialized using
5417 // only one instruction (like a zero or one), then we should fold in those
5418 // operations with the select.
5419 void PPCDAGToDAGISel::foldBoolExts(SDValue
&Res
, SDNode
*&N
) {
5420 if (!PPCSubTarget
->useCRBits())
5423 if (N
->getOpcode() != ISD::ZERO_EXTEND
&&
5424 N
->getOpcode() != ISD::SIGN_EXTEND
&&
5425 N
->getOpcode() != ISD::ANY_EXTEND
)
5428 if (N
->getOperand(0).getValueType() != MVT::i1
)
5431 if (!N
->hasOneUse())
5435 EVT VT
= N
->getValueType(0);
5436 SDValue Cond
= N
->getOperand(0);
5438 CurDAG
->getConstant(N
->getOpcode() == ISD::SIGN_EXTEND
? -1 : 1, dl
, VT
);
5439 SDValue ConstFalse
= CurDAG
->getConstant(0, dl
, VT
);
5442 SDNode
*User
= *N
->use_begin();
5443 if (User
->getNumOperands() != 2)
5446 auto TryFold
= [this, N
, User
, dl
](SDValue Val
) {
5447 SDValue UserO0
= User
->getOperand(0), UserO1
= User
->getOperand(1);
5448 SDValue O0
= UserO0
.getNode() == N
? Val
: UserO0
;
5449 SDValue O1
= UserO1
.getNode() == N
? Val
: UserO1
;
5451 return CurDAG
->FoldConstantArithmetic(User
->getOpcode(), dl
,
5452 User
->getValueType(0),
5453 O0
.getNode(), O1
.getNode());
5456 // FIXME: When the semantics of the interaction between select and undef
5457 // are clearly defined, it may turn out to be unnecessary to break here.
5458 SDValue TrueRes
= TryFold(ConstTrue
);
5459 if (!TrueRes
|| TrueRes
.isUndef())
5461 SDValue FalseRes
= TryFold(ConstFalse
);
5462 if (!FalseRes
|| FalseRes
.isUndef())
5465 // For us to materialize these using one instruction, we must be able to
5466 // represent them as signed 16-bit integers.
5467 uint64_t True
= cast
<ConstantSDNode
>(TrueRes
)->getZExtValue(),
5468 False
= cast
<ConstantSDNode
>(FalseRes
)->getZExtValue();
5469 if (!isInt
<16>(True
) || !isInt
<16>(False
))
5472 // We can replace User with a new SELECT node, and try again to see if we
5473 // can fold the select with its user.
5474 Res
= CurDAG
->getSelect(dl
, User
->getValueType(0), Cond
, TrueRes
, FalseRes
);
5476 ConstTrue
= TrueRes
;
5477 ConstFalse
= FalseRes
;
5478 } while (N
->hasOneUse());
5481 void PPCDAGToDAGISel::PreprocessISelDAG() {
5482 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
5484 bool MadeChange
= false;
5485 while (Position
!= CurDAG
->allnodes_begin()) {
5486 SDNode
*N
= &*--Position
;
5491 switch (N
->getOpcode()) {
5494 Res
= combineToCMPB(N
);
5499 foldBoolExts(Res
, N
);
5502 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
5503 LLVM_DEBUG(N
->dump(CurDAG
));
5504 LLVM_DEBUG(dbgs() << "\nNew: ");
5505 LLVM_DEBUG(Res
.getNode()->dump(CurDAG
));
5506 LLVM_DEBUG(dbgs() << "\n");
5508 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Res
);
5514 CurDAG
->RemoveDeadNodes();
5517 /// PostprocessISelDAG - Perform some late peephole optimizations
5518 /// on the DAG representation.
5519 void PPCDAGToDAGISel::PostprocessISelDAG() {
5520 // Skip peepholes at -O0.
5521 if (TM
.getOptLevel() == CodeGenOpt::None
)
5526 PeepholePPC64ZExt();
5529 // Check if all users of this node will become isel where the second operand
5530 // is the constant zero. If this is so, and if we can negate the condition,
5531 // then we can flip the true and false operands. This will allow the zero to
5532 // be folded with the isel so that we don't need to materialize a register
5534 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode
*N
) {
5535 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5538 if (!User
->isMachineOpcode())
5540 if (User
->getMachineOpcode() != PPC::SELECT_I4
&&
5541 User
->getMachineOpcode() != PPC::SELECT_I8
)
5544 SDNode
*Op2
= User
->getOperand(2).getNode();
5545 if (!Op2
->isMachineOpcode())
5548 if (Op2
->getMachineOpcode() != PPC::LI
&&
5549 Op2
->getMachineOpcode() != PPC::LI8
)
5552 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op2
->getOperand(0));
5556 if (!C
->isNullValue())
5563 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode
*N
) {
5564 SmallVector
<SDNode
*, 4> ToReplace
;
5565 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
5568 assert((User
->getMachineOpcode() == PPC::SELECT_I4
||
5569 User
->getMachineOpcode() == PPC::SELECT_I8
) &&
5570 "Must have all select users");
5571 ToReplace
.push_back(User
);
5574 for (SmallVector
<SDNode
*, 4>::iterator UI
= ToReplace
.begin(),
5575 UE
= ToReplace
.end(); UI
!= UE
; ++UI
) {
5578 CurDAG
->getMachineNode(User
->getMachineOpcode(), SDLoc(User
),
5579 User
->getValueType(0), User
->getOperand(0),
5580 User
->getOperand(2),
5581 User
->getOperand(1));
5583 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
5584 LLVM_DEBUG(User
->dump(CurDAG
));
5585 LLVM_DEBUG(dbgs() << "\nNew: ");
5586 LLVM_DEBUG(ResNode
->dump(CurDAG
));
5587 LLVM_DEBUG(dbgs() << "\n");
5589 ReplaceUses(User
, ResNode
);
5593 void PPCDAGToDAGISel::PeepholeCROps() {
5597 for (SDNode
&Node
: CurDAG
->allnodes()) {
5598 MachineSDNode
*MachineNode
= dyn_cast
<MachineSDNode
>(&Node
);
5599 if (!MachineNode
|| MachineNode
->use_empty())
5601 SDNode
*ResNode
= MachineNode
;
5603 bool Op1Set
= false, Op1Unset
= false,
5605 Op2Set
= false, Op2Unset
= false,
5608 unsigned Opcode
= MachineNode
->getMachineOpcode();
5619 SDValue Op
= MachineNode
->getOperand(1);
5620 if (Op
.isMachineOpcode()) {
5621 if (Op
.getMachineOpcode() == PPC::CRSET
)
5623 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
5625 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
5626 Op
.getOperand(0) == Op
.getOperand(1))
5633 case PPC::SELECT_I4
:
5634 case PPC::SELECT_I8
:
5635 case PPC::SELECT_F4
:
5636 case PPC::SELECT_F8
:
5637 case PPC::SELECT_QFRC
:
5638 case PPC::SELECT_QSRC
:
5639 case PPC::SELECT_QBRC
:
5640 case PPC::SELECT_SPE
:
5641 case PPC::SELECT_SPE4
:
5642 case PPC::SELECT_VRRC
:
5643 case PPC::SELECT_VSFRC
:
5644 case PPC::SELECT_VSSRC
:
5645 case PPC::SELECT_VSRC
: {
5646 SDValue Op
= MachineNode
->getOperand(0);
5647 if (Op
.isMachineOpcode()) {
5648 if (Op
.getMachineOpcode() == PPC::CRSET
)
5650 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
5652 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
5653 Op
.getOperand(0) == Op
.getOperand(1))
5660 bool SelectSwap
= false;
5664 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5666 ResNode
= MachineNode
->getOperand(0).getNode();
5669 ResNode
= MachineNode
->getOperand(1).getNode();
5672 ResNode
= MachineNode
->getOperand(0).getNode();
5673 else if (Op1Unset
|| Op2Unset
)
5674 // x & 0 = 0 & y = 0
5675 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5678 // ~x & y = andc(y, x)
5679 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5680 MVT::i1
, MachineNode
->getOperand(1),
5681 MachineNode
->getOperand(0).
5684 // x & ~y = andc(x, y)
5685 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5686 MVT::i1
, MachineNode
->getOperand(0),
5687 MachineNode
->getOperand(1).
5689 else if (AllUsersSelectZero(MachineNode
)) {
5690 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
5691 MVT::i1
, MachineNode
->getOperand(0),
5692 MachineNode
->getOperand(1));
5697 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5698 // nand(x, x) -> nor(x, x)
5699 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5700 MVT::i1
, MachineNode
->getOperand(0),
5701 MachineNode
->getOperand(0));
5703 // nand(1, y) -> nor(y, y)
5704 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5705 MVT::i1
, MachineNode
->getOperand(1),
5706 MachineNode
->getOperand(1));
5708 // nand(x, 1) -> nor(x, x)
5709 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5710 MVT::i1
, MachineNode
->getOperand(0),
5711 MachineNode
->getOperand(0));
5712 else if (Op1Unset
|| Op2Unset
)
5713 // nand(x, 0) = nand(0, y) = 1
5714 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5717 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5718 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5719 MVT::i1
, MachineNode
->getOperand(0).
5721 MachineNode
->getOperand(1));
5723 // nand(x, ~y) = ~x | y = orc(y, x)
5724 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5725 MVT::i1
, MachineNode
->getOperand(1).
5727 MachineNode
->getOperand(0));
5728 else if (AllUsersSelectZero(MachineNode
)) {
5729 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
5730 MVT::i1
, MachineNode
->getOperand(0),
5731 MachineNode
->getOperand(1));
5736 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5738 ResNode
= MachineNode
->getOperand(0).getNode();
5739 else if (Op1Set
|| Op2Set
)
5740 // x | 1 = 1 | y = 1
5741 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5745 ResNode
= MachineNode
->getOperand(1).getNode();
5748 ResNode
= MachineNode
->getOperand(0).getNode();
5750 // ~x | y = orc(y, x)
5751 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5752 MVT::i1
, MachineNode
->getOperand(1),
5753 MachineNode
->getOperand(0).
5756 // x | ~y = orc(x, y)
5757 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5758 MVT::i1
, MachineNode
->getOperand(0),
5759 MachineNode
->getOperand(1).
5761 else if (AllUsersSelectZero(MachineNode
)) {
5762 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5763 MVT::i1
, MachineNode
->getOperand(0),
5764 MachineNode
->getOperand(1));
5769 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5771 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5774 // xor(1, y) -> nor(y, y)
5775 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5776 MVT::i1
, MachineNode
->getOperand(1),
5777 MachineNode
->getOperand(1));
5779 // xor(x, 1) -> nor(x, x)
5780 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5781 MVT::i1
, MachineNode
->getOperand(0),
5782 MachineNode
->getOperand(0));
5785 ResNode
= MachineNode
->getOperand(1).getNode();
5788 ResNode
= MachineNode
->getOperand(0).getNode();
5790 // xor(~x, y) = eqv(x, y)
5791 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5792 MVT::i1
, MachineNode
->getOperand(0).
5794 MachineNode
->getOperand(1));
5796 // xor(x, ~y) = eqv(x, y)
5797 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5798 MVT::i1
, MachineNode
->getOperand(0),
5799 MachineNode
->getOperand(1).
5801 else if (AllUsersSelectZero(MachineNode
)) {
5802 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
5803 MVT::i1
, MachineNode
->getOperand(0),
5804 MachineNode
->getOperand(1));
5809 if (Op1Set
|| Op2Set
)
5811 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5814 // nor(0, y) = ~y -> nor(y, y)
5815 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5816 MVT::i1
, MachineNode
->getOperand(1),
5817 MachineNode
->getOperand(1));
5820 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5821 MVT::i1
, MachineNode
->getOperand(0),
5822 MachineNode
->getOperand(0));
5824 // nor(~x, y) = andc(x, y)
5825 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5826 MVT::i1
, MachineNode
->getOperand(0).
5828 MachineNode
->getOperand(1));
5830 // nor(x, ~y) = andc(y, x)
5831 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5832 MVT::i1
, MachineNode
->getOperand(1).
5834 MachineNode
->getOperand(0));
5835 else if (AllUsersSelectZero(MachineNode
)) {
5836 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
5837 MVT::i1
, MachineNode
->getOperand(0),
5838 MachineNode
->getOperand(1));
5843 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5845 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5849 ResNode
= MachineNode
->getOperand(1).getNode();
5852 ResNode
= MachineNode
->getOperand(0).getNode();
5854 // eqv(0, y) = ~y -> nor(y, y)
5855 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5856 MVT::i1
, MachineNode
->getOperand(1),
5857 MachineNode
->getOperand(1));
5860 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5861 MVT::i1
, MachineNode
->getOperand(0),
5862 MachineNode
->getOperand(0));
5864 // eqv(~x, y) = xor(x, y)
5865 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5866 MVT::i1
, MachineNode
->getOperand(0).
5868 MachineNode
->getOperand(1));
5870 // eqv(x, ~y) = xor(x, y)
5871 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5872 MVT::i1
, MachineNode
->getOperand(0),
5873 MachineNode
->getOperand(1).
5875 else if (AllUsersSelectZero(MachineNode
)) {
5876 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
5877 MVT::i1
, MachineNode
->getOperand(0),
5878 MachineNode
->getOperand(1));
5883 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5885 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5889 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5890 MVT::i1
, MachineNode
->getOperand(1),
5891 MachineNode
->getOperand(1));
5892 else if (Op1Unset
|| Op2Set
)
5893 // andc(0, y) = andc(x, 1) = 0
5894 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
5898 ResNode
= MachineNode
->getOperand(0).getNode();
5900 // andc(~x, y) = ~(x | y) = nor(x, y)
5901 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5902 MVT::i1
, MachineNode
->getOperand(0).
5904 MachineNode
->getOperand(1));
5906 // andc(x, ~y) = x & y
5907 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
5908 MVT::i1
, MachineNode
->getOperand(0),
5909 MachineNode
->getOperand(1).
5911 else if (AllUsersSelectZero(MachineNode
)) {
5912 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
5913 MVT::i1
, MachineNode
->getOperand(1),
5914 MachineNode
->getOperand(0));
5919 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
5921 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5923 else if (Op1Set
|| Op2Unset
)
5924 // orc(1, y) = orc(x, 0) = 1
5925 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
5929 ResNode
= MachineNode
->getOperand(0).getNode();
5932 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
5933 MVT::i1
, MachineNode
->getOperand(1),
5934 MachineNode
->getOperand(1));
5936 // orc(~x, y) = ~(x & y) = nand(x, y)
5937 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
5938 MVT::i1
, MachineNode
->getOperand(0).
5940 MachineNode
->getOperand(1));
5942 // orc(x, ~y) = x | y
5943 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
5944 MVT::i1
, MachineNode
->getOperand(0),
5945 MachineNode
->getOperand(1).
5947 else if (AllUsersSelectZero(MachineNode
)) {
5948 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
5949 MVT::i1
, MachineNode
->getOperand(1),
5950 MachineNode
->getOperand(0));
5954 case PPC::SELECT_I4
:
5955 case PPC::SELECT_I8
:
5956 case PPC::SELECT_F4
:
5957 case PPC::SELECT_F8
:
5958 case PPC::SELECT_QFRC
:
5959 case PPC::SELECT_QSRC
:
5960 case PPC::SELECT_QBRC
:
5961 case PPC::SELECT_SPE
:
5962 case PPC::SELECT_SPE4
:
5963 case PPC::SELECT_VRRC
:
5964 case PPC::SELECT_VSFRC
:
5965 case PPC::SELECT_VSSRC
:
5966 case PPC::SELECT_VSRC
:
5968 ResNode
= MachineNode
->getOperand(1).getNode();
5970 ResNode
= MachineNode
->getOperand(2).getNode();
5972 ResNode
= CurDAG
->getMachineNode(MachineNode
->getMachineOpcode(),
5974 MachineNode
->getValueType(0),
5975 MachineNode
->getOperand(0).
5977 MachineNode
->getOperand(2),
5978 MachineNode
->getOperand(1));
5983 ResNode
= CurDAG
->getMachineNode(Opcode
== PPC::BC
? PPC::BCn
:
5987 MachineNode
->getOperand(0).
5989 MachineNode
->getOperand(1),
5990 MachineNode
->getOperand(2));
5991 // FIXME: Handle Op1Set, Op1Unset here too.
5995 // If we're inverting this node because it is used only by selects that
5996 // we'd like to swap, then swap the selects before the node replacement.
5998 SwapAllSelectUsers(MachineNode
);
6000 if (ResNode
!= MachineNode
) {
6001 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6002 LLVM_DEBUG(MachineNode
->dump(CurDAG
));
6003 LLVM_DEBUG(dbgs() << "\nNew: ");
6004 LLVM_DEBUG(ResNode
->dump(CurDAG
));
6005 LLVM_DEBUG(dbgs() << "\n");
6007 ReplaceUses(MachineNode
, ResNode
);
6012 CurDAG
->RemoveDeadNodes();
6013 } while (IsModified
);
6016 // Gather the set of 32-bit operations that are known to have their
6017 // higher-order 32 bits zero, where ToPromote contains all such operations.
6018 static bool PeepholePPC64ZExtGather(SDValue Op32
,
6019 SmallPtrSetImpl
<SDNode
*> &ToPromote
) {
6020 if (!Op32
.isMachineOpcode())
6023 // First, check for the "frontier" instructions (those that will clear the
6024 // higher-order 32 bits.
6026 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
6027 // around. If it does not, then these instructions will clear the
6028 // higher-order bits.
6029 if ((Op32
.getMachineOpcode() == PPC::RLWINM
||
6030 Op32
.getMachineOpcode() == PPC::RLWNM
) &&
6031 Op32
.getConstantOperandVal(2) <= Op32
.getConstantOperandVal(3)) {
6032 ToPromote
.insert(Op32
.getNode());
6036 // SLW and SRW always clear the higher-order bits.
6037 if (Op32
.getMachineOpcode() == PPC::SLW
||
6038 Op32
.getMachineOpcode() == PPC::SRW
) {
6039 ToPromote
.insert(Op32
.getNode());
6043 // For LI and LIS, we need the immediate to be positive (so that it is not
6045 if (Op32
.getMachineOpcode() == PPC::LI
||
6046 Op32
.getMachineOpcode() == PPC::LIS
) {
6047 if (!isUInt
<15>(Op32
.getConstantOperandVal(0)))
6050 ToPromote
.insert(Op32
.getNode());
6054 // LHBRX and LWBRX always clear the higher-order bits.
6055 if (Op32
.getMachineOpcode() == PPC::LHBRX
||
6056 Op32
.getMachineOpcode() == PPC::LWBRX
) {
6057 ToPromote
.insert(Op32
.getNode());
6061 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
6062 if (Op32
.getMachineOpcode() == PPC::CNTLZW
||
6063 Op32
.getMachineOpcode() == PPC::CNTTZW
) {
6064 ToPromote
.insert(Op32
.getNode());
6068 // Next, check for those instructions we can look through.
6070 // Assuming the mask does not wrap around, then the higher-order bits are
6071 // taken directly from the first operand.
6072 if (Op32
.getMachineOpcode() == PPC::RLWIMI
&&
6073 Op32
.getConstantOperandVal(3) <= Op32
.getConstantOperandVal(4)) {
6074 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6075 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
6078 ToPromote
.insert(Op32
.getNode());
6079 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6083 // For OR, the higher-order bits are zero if that is true for both operands.
6084 // For SELECT_I4, the same is true (but the relevant operand numbers are
6086 if (Op32
.getMachineOpcode() == PPC::OR
||
6087 Op32
.getMachineOpcode() == PPC::SELECT_I4
) {
6088 unsigned B
= Op32
.getMachineOpcode() == PPC::SELECT_I4
? 1 : 0;
6089 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6090 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+0), ToPromote1
))
6092 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+1), ToPromote1
))
6095 ToPromote
.insert(Op32
.getNode());
6096 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6100 // For ORI and ORIS, we need the higher-order bits of the first operand to be
6101 // zero, and also for the constant to be positive (so that it is not sign
6103 if (Op32
.getMachineOpcode() == PPC::ORI
||
6104 Op32
.getMachineOpcode() == PPC::ORIS
) {
6105 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6106 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
6108 if (!isUInt
<15>(Op32
.getConstantOperandVal(1)))
6111 ToPromote
.insert(Op32
.getNode());
6112 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6116 // The higher-order bits of AND are zero if that is true for at least one of
6118 if (Op32
.getMachineOpcode() == PPC::AND
) {
6119 SmallPtrSet
<SDNode
*, 16> ToPromote1
, ToPromote2
;
6121 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
6123 PeepholePPC64ZExtGather(Op32
.getOperand(1), ToPromote2
);
6124 if (!Op0OK
&& !Op1OK
)
6127 ToPromote
.insert(Op32
.getNode());
6130 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6133 ToPromote
.insert(ToPromote2
.begin(), ToPromote2
.end());
6138 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
6139 // of the first operand, or if the second operand is positive (so that it is
6140 // not sign extended).
6141 if (Op32
.getMachineOpcode() == PPC::ANDIo
||
6142 Op32
.getMachineOpcode() == PPC::ANDISo
) {
6143 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6145 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
6146 bool Op1OK
= isUInt
<15>(Op32
.getConstantOperandVal(1));
6147 if (!Op0OK
&& !Op1OK
)
6150 ToPromote
.insert(Op32
.getNode());
6153 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6161 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
6162 if (!PPCSubTarget
->isPPC64())
6165 // When we zero-extend from i32 to i64, we use a pattern like this:
6166 // def : Pat<(i64 (zext i32:$in)),
6167 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
6169 // There are several 32-bit shift/rotate instructions, however, that will
6170 // clear the higher-order bits of their output, rendering the RLDICL
6171 // unnecessary. When that happens, we remove it here, and redefine the
6172 // relevant 32-bit operation to be a 64-bit operation.
6174 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6176 bool MadeChange
= false;
6177 while (Position
!= CurDAG
->allnodes_begin()) {
6178 SDNode
*N
= &*--Position
;
6179 // Skip dead nodes and any non-machine opcodes.
6180 if (N
->use_empty() || !N
->isMachineOpcode())
6183 if (N
->getMachineOpcode() != PPC::RLDICL
)
6186 if (N
->getConstantOperandVal(1) != 0 ||
6187 N
->getConstantOperandVal(2) != 32)
6190 SDValue ISR
= N
->getOperand(0);
6191 if (!ISR
.isMachineOpcode() ||
6192 ISR
.getMachineOpcode() != TargetOpcode::INSERT_SUBREG
)
6195 if (!ISR
.hasOneUse())
6198 if (ISR
.getConstantOperandVal(2) != PPC::sub_32
)
6201 SDValue IDef
= ISR
.getOperand(0);
6202 if (!IDef
.isMachineOpcode() ||
6203 IDef
.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF
)
6206 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
6207 // can get rid of it.
6209 SDValue Op32
= ISR
->getOperand(1);
6210 if (!Op32
.isMachineOpcode())
6213 // There are some 32-bit instructions that always clear the high-order 32
6214 // bits, there are also some instructions (like AND) that we can look
6216 SmallPtrSet
<SDNode
*, 16> ToPromote
;
6217 if (!PeepholePPC64ZExtGather(Op32
, ToPromote
))
6220 // If the ToPromote set contains nodes that have uses outside of the set
6221 // (except for the original INSERT_SUBREG), then abort the transformation.
6222 bool OutsideUse
= false;
6223 for (SDNode
*PN
: ToPromote
) {
6224 for (SDNode
*UN
: PN
->uses()) {
6225 if (!ToPromote
.count(UN
) && UN
!= ISR
.getNode()) {
6239 // We now know that this zero extension can be removed by promoting to
6240 // nodes in ToPromote to 64-bit operations, where for operations in the
6241 // frontier of the set, we need to insert INSERT_SUBREGs for their
6243 for (SDNode
*PN
: ToPromote
) {
6245 switch (PN
->getMachineOpcode()) {
6247 llvm_unreachable("Don't know the 64-bit variant of this instruction");
6248 case PPC::RLWINM
: NewOpcode
= PPC::RLWINM8
; break;
6249 case PPC::RLWNM
: NewOpcode
= PPC::RLWNM8
; break;
6250 case PPC::SLW
: NewOpcode
= PPC::SLW8
; break;
6251 case PPC::SRW
: NewOpcode
= PPC::SRW8
; break;
6252 case PPC::LI
: NewOpcode
= PPC::LI8
; break;
6253 case PPC::LIS
: NewOpcode
= PPC::LIS8
; break;
6254 case PPC::LHBRX
: NewOpcode
= PPC::LHBRX8
; break;
6255 case PPC::LWBRX
: NewOpcode
= PPC::LWBRX8
; break;
6256 case PPC::CNTLZW
: NewOpcode
= PPC::CNTLZW8
; break;
6257 case PPC::CNTTZW
: NewOpcode
= PPC::CNTTZW8
; break;
6258 case PPC::RLWIMI
: NewOpcode
= PPC::RLWIMI8
; break;
6259 case PPC::OR
: NewOpcode
= PPC::OR8
; break;
6260 case PPC::SELECT_I4
: NewOpcode
= PPC::SELECT_I8
; break;
6261 case PPC::ORI
: NewOpcode
= PPC::ORI8
; break;
6262 case PPC::ORIS
: NewOpcode
= PPC::ORIS8
; break;
6263 case PPC::AND
: NewOpcode
= PPC::AND8
; break;
6264 case PPC::ANDIo
: NewOpcode
= PPC::ANDIo8
; break;
6265 case PPC::ANDISo
: NewOpcode
= PPC::ANDISo8
; break;
6268 // Note: During the replacement process, the nodes will be in an
6269 // inconsistent state (some instructions will have operands with values
6270 // of the wrong type). Once done, however, everything should be right
6273 SmallVector
<SDValue
, 4> Ops
;
6274 for (const SDValue
&V
: PN
->ops()) {
6275 if (!ToPromote
.count(V
.getNode()) && V
.getValueType() == MVT::i32
&&
6276 !isa
<ConstantSDNode
>(V
)) {
6277 SDValue ReplOpOps
[] = { ISR
.getOperand(0), V
, ISR
.getOperand(2) };
6279 CurDAG
->getMachineNode(TargetOpcode::INSERT_SUBREG
, SDLoc(V
),
6280 ISR
.getNode()->getVTList(), ReplOpOps
);
6281 Ops
.push_back(SDValue(ReplOp
, 0));
6287 // Because all to-be-promoted nodes only have users that are other
6288 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6289 // the i32 result value type with i64.
6291 SmallVector
<EVT
, 2> NewVTs
;
6292 SDVTList VTs
= PN
->getVTList();
6293 for (unsigned i
= 0, ie
= VTs
.NumVTs
; i
!= ie
; ++i
)
6294 if (VTs
.VTs
[i
] == MVT::i32
)
6295 NewVTs
.push_back(MVT::i64
);
6297 NewVTs
.push_back(VTs
.VTs
[i
]);
6299 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
6300 LLVM_DEBUG(PN
->dump(CurDAG
));
6302 CurDAG
->SelectNodeTo(PN
, NewOpcode
, CurDAG
->getVTList(NewVTs
), Ops
);
6304 LLVM_DEBUG(dbgs() << "\nNew: ");
6305 LLVM_DEBUG(PN
->dump(CurDAG
));
6306 LLVM_DEBUG(dbgs() << "\n");
6309 // Now we replace the original zero extend and its associated INSERT_SUBREG
6310 // with the value feeding the INSERT_SUBREG (which has now been promoted to
6313 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
6314 LLVM_DEBUG(N
->dump(CurDAG
));
6315 LLVM_DEBUG(dbgs() << "\nNew: ");
6316 LLVM_DEBUG(Op32
.getNode()->dump(CurDAG
));
6317 LLVM_DEBUG(dbgs() << "\n");
6319 ReplaceUses(N
, Op32
.getNode());
6323 CurDAG
->RemoveDeadNodes();
6326 void PPCDAGToDAGISel::PeepholePPC64() {
6327 // These optimizations are currently supported only for 64-bit SVR4.
6328 if (PPCSubTarget
->isDarwin() || !PPCSubTarget
->isPPC64())
6331 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6333 while (Position
!= CurDAG
->allnodes_begin()) {
6334 SDNode
*N
= &*--Position
;
6335 // Skip dead nodes and any non-machine opcodes.
6336 if (N
->use_empty() || !N
->isMachineOpcode())
6340 unsigned StorageOpcode
= N
->getMachineOpcode();
6341 bool RequiresMod4Offset
= false;
6343 switch (StorageOpcode
) {
6348 case PPC::DFLOADf64
:
6349 case PPC::DFLOADf32
:
6350 RequiresMod4Offset
= true;
6366 case PPC::DFSTOREf64
:
6367 case PPC::DFSTOREf32
:
6368 RequiresMod4Offset
= true;
6382 // If this is a load or store with a zero offset, or within the alignment,
6383 // we may be able to fold an add-immediate into the memory operation.
6384 // The check against alignment is below, as it can't occur until we check
6385 // the arguments to N
6386 if (!isa
<ConstantSDNode
>(N
->getOperand(FirstOp
)))
6389 SDValue Base
= N
->getOperand(FirstOp
+ 1);
6390 if (!Base
.isMachineOpcode())
6394 bool ReplaceFlags
= true;
6396 // When the feeding operation is an add-immediate of some sort,
6397 // determine whether we need to add relocation information to the
6398 // target flags on the immediate operand when we fold it into the
6399 // load instruction.
6401 // For something like ADDItocL, the relocation information is
6402 // inferred from the opcode; when we process it in the AsmPrinter,
6403 // we add the necessary relocation there. A load, though, can receive
6404 // relocation from various flavors of ADDIxxx, so we need to carry
6405 // the relocation information in the target flags.
6406 switch (Base
.getMachineOpcode()) {
6411 // In some cases (such as TLS) the relocation information
6412 // is already in place on the operand, so copying the operand
6414 ReplaceFlags
= false;
6415 // For these cases, the immediate may not be divisible by 4, in
6416 // which case the fold is illegal for DS-form instructions. (The
6417 // other cases provide aligned addresses and are always safe.)
6418 if (RequiresMod4Offset
&&
6419 (!isa
<ConstantSDNode
>(Base
.getOperand(1)) ||
6420 Base
.getConstantOperandVal(1) % 4 != 0))
6423 case PPC::ADDIdtprelL
:
6424 Flags
= PPCII::MO_DTPREL_LO
;
6426 case PPC::ADDItlsldL
:
6427 Flags
= PPCII::MO_TLSLD_LO
;
6430 Flags
= PPCII::MO_TOC_LO
;
6434 SDValue ImmOpnd
= Base
.getOperand(1);
6436 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6437 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6438 // we might have needed different @ha relocation values for the offset
6440 int MaxDisplacement
= 7;
6441 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
6442 const GlobalValue
*GV
= GA
->getGlobal();
6443 MaxDisplacement
= std::min((int) GV
->getAlignment() - 1, MaxDisplacement
);
6446 bool UpdateHBase
= false;
6447 SDValue HBase
= Base
.getOperand(0);
6449 int Offset
= N
->getConstantOperandVal(FirstOp
);
6451 if (Offset
< 0 || Offset
> MaxDisplacement
) {
6452 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6453 // one use, then we can do this for any offset, we just need to also
6454 // update the offset (i.e. the symbol addend) on the addis also.
6455 if (Base
.getMachineOpcode() != PPC::ADDItocL
)
6458 if (!HBase
.isMachineOpcode() ||
6459 HBase
.getMachineOpcode() != PPC::ADDIStocHA
)
6462 if (!Base
.hasOneUse() || !HBase
.hasOneUse())
6465 SDValue HImmOpnd
= HBase
.getOperand(1);
6466 if (HImmOpnd
!= ImmOpnd
)
6472 // If we're directly folding the addend from an addi instruction, then:
6473 // 1. In general, the offset on the memory access must be zero.
6474 // 2. If the addend is a constant, then it can be combined with a
6475 // non-zero offset, but only if the result meets the encoding
6477 if (auto *C
= dyn_cast
<ConstantSDNode
>(ImmOpnd
)) {
6478 Offset
+= C
->getSExtValue();
6480 if (RequiresMod4Offset
&& (Offset
% 4) != 0)
6483 if (!isInt
<16>(Offset
))
6486 ImmOpnd
= CurDAG
->getTargetConstant(Offset
, SDLoc(ImmOpnd
),
6487 ImmOpnd
.getValueType());
6488 } else if (Offset
!= 0) {
6493 // We found an opportunity. Reverse the operands from the add
6494 // immediate and substitute them into the load or store. If
6495 // needed, update the target flags for the immediate operand to
6496 // reflect the necessary relocation information.
6497 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
6498 LLVM_DEBUG(Base
->dump(CurDAG
));
6499 LLVM_DEBUG(dbgs() << "\nN: ");
6500 LLVM_DEBUG(N
->dump(CurDAG
));
6501 LLVM_DEBUG(dbgs() << "\n");
6503 // If the relocation information isn't already present on the
6504 // immediate operand, add it now.
6506 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
6508 const GlobalValue
*GV
= GA
->getGlobal();
6509 // We can't perform this optimization for data whose alignment
6510 // is insufficient for the instruction encoding.
6511 if (GV
->getAlignment() < 4 &&
6512 (RequiresMod4Offset
|| (Offset
% 4) != 0)) {
6513 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6516 ImmOpnd
= CurDAG
->getTargetGlobalAddress(GV
, dl
, MVT::i64
, Offset
, Flags
);
6517 } else if (ConstantPoolSDNode
*CP
=
6518 dyn_cast
<ConstantPoolSDNode
>(ImmOpnd
)) {
6519 const Constant
*C
= CP
->getConstVal();
6520 ImmOpnd
= CurDAG
->getTargetConstantPool(C
, MVT::i64
,
6526 if (FirstOp
== 1) // Store
6527 (void)CurDAG
->UpdateNodeOperands(N
, N
->getOperand(0), ImmOpnd
,
6528 Base
.getOperand(0), N
->getOperand(3));
6530 (void)CurDAG
->UpdateNodeOperands(N
, ImmOpnd
, Base
.getOperand(0),
6534 (void)CurDAG
->UpdateNodeOperands(HBase
.getNode(), HBase
.getOperand(0),
6537 // The add-immediate may now be dead, in which case remove it.
6538 if (Base
.getNode()->use_empty())
6539 CurDAG
->RemoveDeadNode(Base
.getNode());
6543 /// createPPCISelDag - This pass converts a legalized DAG into a
6544 /// PowerPC-specific DAG, ready for instruction scheduling.
6546 FunctionPass
*llvm::createPPCISelDag(PPCTargetMachine
&TM
,
6547 CodeGenOpt::Level OptLevel
) {
6548 return new PPCDAGToDAGISel(TM
, OptLevel
);