1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
12 //===----------------------------------------------------------------------===//
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "MCTargetDesc/PPCPredicates.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/BranchProbabilityInfo.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGISel.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetRegisterInfo.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/IntrinsicsPowerPC.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/KnownBits.h"
55 #include "llvm/Support/MachineValueType.h"
56 #include "llvm/Support/MathExtras.h"
57 #include "llvm/Support/raw_ostream.h"
70 #define DEBUG_TYPE "ppc-codegen"
72 STATISTIC(NumSextSetcc
,
73 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc
,
75 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded
,
77 "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded
,
79 "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison
,
81 "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses
,
83 "Number of compares not eliminated as they have non-extending uses.");
85 "Number of compares lowered to setb.");
87 // FIXME: Remove this once the bug has been fixed!
88 cl::opt
<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
89 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden
);
92 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
93 cl::desc("use aggressive ppc isel for bit permutations"),
95 static cl::opt
<bool> BPermRewriterNoMasking(
96 "ppc-bit-perm-rewriter-stress-rotates",
97 cl::desc("stress rotate selection in aggressive ppc isel for "
101 static cl::opt
<bool> EnableBranchHint(
102 "ppc-use-branch-hint", cl::init(true),
103 cl::desc("Enable static hinting of branches on ppc"),
106 static cl::opt
<bool> EnableTLSOpt(
107 "ppc-tls-opt", cl::init(true),
108 cl::desc("Enable tls optimization peephole"),
111 enum ICmpInGPRType
{ ICGPR_All
, ICGPR_None
, ICGPR_I32
, ICGPR_I64
,
112 ICGPR_NonExtIn
, ICGPR_Zext
, ICGPR_Sext
, ICGPR_ZextI32
,
113 ICGPR_SextI32
, ICGPR_ZextI64
, ICGPR_SextI64
};
115 static cl::opt
<ICmpInGPRType
> CmpInGPR(
116 "ppc-gpr-icmps", cl::Hidden
, cl::init(ICGPR_All
),
117 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
118 cl::values(clEnumValN(ICGPR_None
, "none", "Do not modify integer comparisons."),
119 clEnumValN(ICGPR_All
, "all", "All possible int comparisons in GPRs."),
120 clEnumValN(ICGPR_I32
, "i32", "Only i32 comparisons in GPRs."),
121 clEnumValN(ICGPR_I64
, "i64", "Only i64 comparisons in GPRs."),
122 clEnumValN(ICGPR_NonExtIn
, "nonextin",
123 "Only comparisons where inputs don't need [sz]ext."),
124 clEnumValN(ICGPR_Zext
, "zext", "Only comparisons with zext result."),
125 clEnumValN(ICGPR_ZextI32
, "zexti32",
126 "Only i32 comparisons with zext result."),
127 clEnumValN(ICGPR_ZextI64
, "zexti64",
128 "Only i64 comparisons with zext result."),
129 clEnumValN(ICGPR_Sext
, "sext", "Only comparisons with sext result."),
130 clEnumValN(ICGPR_SextI32
, "sexti32",
131 "Only i32 comparisons with sext result."),
132 clEnumValN(ICGPR_SextI64
, "sexti64",
133 "Only i64 comparisons with sext result.")));
136 //===--------------------------------------------------------------------===//
137 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
138 /// instructions for SelectionDAG operations.
140 class PPCDAGToDAGISel
: public SelectionDAGISel
{
141 const PPCTargetMachine
&TM
;
142 const PPCSubtarget
*Subtarget
= nullptr;
143 const PPCTargetLowering
*PPCLowering
= nullptr;
144 unsigned GlobalBaseReg
= 0;
147 explicit PPCDAGToDAGISel(PPCTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
148 : SelectionDAGISel(tm
, OptLevel
), TM(tm
) {}
150 bool runOnMachineFunction(MachineFunction
&MF
) override
{
151 // Make sure we re-emit a set of the global base reg if necessary
153 Subtarget
= &MF
.getSubtarget
<PPCSubtarget
>();
154 PPCLowering
= Subtarget
->getTargetLowering();
155 if (Subtarget
->hasROPProtect()) {
156 // Create a place on the stack for the ROP Protection Hash.
157 // The ROP Protection Hash will always be 8 bytes and aligned to 8
159 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
160 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
161 const int Result
= MFI
.CreateStackObject(8, Align(8), false);
162 FI
->setROPProtectionHashSaveIndex(Result
);
164 SelectionDAGISel::runOnMachineFunction(MF
);
169 void PreprocessISelDAG() override
;
170 void PostprocessISelDAG() override
;
172 /// getI16Imm - Return a target constant with the specified value, of type
174 inline SDValue
getI16Imm(unsigned Imm
, const SDLoc
&dl
) {
175 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i16
);
178 /// getI32Imm - Return a target constant with the specified value, of type
180 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
181 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
184 /// getI64Imm - Return a target constant with the specified value, of type
186 inline SDValue
getI64Imm(uint64_t Imm
, const SDLoc
&dl
) {
187 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
190 /// getSmallIPtrImm - Return a target constant of pointer type.
191 inline SDValue
getSmallIPtrImm(unsigned Imm
, const SDLoc
&dl
) {
192 return CurDAG
->getTargetConstant(
193 Imm
, dl
, PPCLowering
->getPointerTy(CurDAG
->getDataLayout()));
196 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
197 /// rotate and mask opcode and mask operation.
198 static bool isRotateAndMask(SDNode
*N
, unsigned Mask
, bool isShiftMask
,
199 unsigned &SH
, unsigned &MB
, unsigned &ME
);
201 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
202 /// base register. Return the virtual register that holds this value.
203 SDNode
*getGlobalBaseReg();
205 void selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
= 0);
207 // Select - Convert the specified operand from a target-independent to a
208 // target-specific node if it hasn't already been changed.
209 void Select(SDNode
*N
) override
;
211 bool tryBitfieldInsert(SDNode
*N
);
212 bool tryBitPermutation(SDNode
*N
);
213 bool tryIntCompareInGPR(SDNode
*N
);
215 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
216 // an X-Form load instruction with the offset being a relocation coming from
217 // the PPCISD::ADD_TLS.
218 bool tryTLSXFormLoad(LoadSDNode
*N
);
219 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
220 // an X-Form store instruction with the offset being a relocation coming from
221 // the PPCISD::ADD_TLS.
222 bool tryTLSXFormStore(StoreSDNode
*N
);
223 /// SelectCC - Select a comparison of the specified values with the
224 /// specified condition code, returning the CR# of the expression.
225 SDValue
SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
226 const SDLoc
&dl
, SDValue Chain
= SDValue());
228 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
229 /// immediate field. Note that the operand at this point is already the
230 /// result of a prior SelectAddressRegImm call.
231 bool SelectAddrImmOffs(SDValue N
, SDValue
&Out
) const {
232 if (N
.getOpcode() == ISD::TargetConstant
||
233 N
.getOpcode() == ISD::TargetGlobalAddress
) {
241 /// SelectDSForm - Returns true if address N can be represented by the
242 /// addressing mode of DSForm instructions (a base register, plus a signed
243 /// 16-bit displacement that is a multiple of 4.
244 bool SelectDSForm(SDNode
*Parent
, SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
245 return PPCLowering
->SelectOptimalAddrMode(Parent
, N
, Disp
, Base
, *CurDAG
,
246 Align(4)) == PPC::AM_DSForm
;
249 /// SelectDQForm - Returns true if address N can be represented by the
250 /// addressing mode of DQForm instructions (a base register, plus a signed
251 /// 16-bit displacement that is a multiple of 16.
252 bool SelectDQForm(SDNode
*Parent
, SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
253 return PPCLowering
->SelectOptimalAddrMode(Parent
, N
, Disp
, Base
, *CurDAG
,
254 Align(16)) == PPC::AM_DQForm
;
257 /// SelectDForm - Returns true if address N can be represented by
258 /// the addressing mode of DForm instructions (a base register, plus a
259 /// signed 16-bit immediate.
260 bool SelectDForm(SDNode
*Parent
, SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
261 return PPCLowering
->SelectOptimalAddrMode(Parent
, N
, Disp
, Base
, *CurDAG
,
262 None
) == PPC::AM_DForm
;
265 /// SelectXForm - Returns true if address N can be represented by the
266 /// addressing mode of XForm instructions (an indexed [r+r] operation).
267 bool SelectXForm(SDNode
*Parent
, SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
268 return PPCLowering
->SelectOptimalAddrMode(Parent
, N
, Disp
, Base
, *CurDAG
,
269 None
) == PPC::AM_XForm
;
272 /// SelectForceXForm - Given the specified address, force it to be
273 /// represented as an indexed [r+r] operation (an XForm instruction).
274 bool SelectForceXForm(SDNode
*Parent
, SDValue N
, SDValue
&Disp
,
276 return PPCLowering
->SelectForceXFormMode(N
, Disp
, Base
, *CurDAG
) ==
280 /// SelectAddrIdx - Given the specified address, check to see if it can be
281 /// represented as an indexed [r+r] operation.
282 /// This is for xform instructions whose associated displacement form is D.
283 /// The last parameter \p 0 means associated D form has no requirment for 16
284 /// bit signed displacement.
285 /// Returns false if it can be represented by [r+imm], which are preferred.
286 bool SelectAddrIdx(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
287 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
, None
);
290 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
291 /// represented as an indexed [r+r] operation.
292 /// This is for xform instructions whose associated displacement form is DS.
293 /// The last parameter \p 4 means associated DS form 16 bit signed
294 /// displacement must be a multiple of 4.
295 /// Returns false if it can be represented by [r+imm], which are preferred.
296 bool SelectAddrIdxX4(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
297 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
,
301 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
302 /// represented as an indexed [r+r] operation.
303 /// This is for xform instructions whose associated displacement form is DQ.
304 /// The last parameter \p 16 means associated DQ form 16 bit signed
305 /// displacement must be a multiple of 16.
306 /// Returns false if it can be represented by [r+imm], which are preferred.
307 bool SelectAddrIdxX16(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
308 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
,
312 /// SelectAddrIdxOnly - Given the specified address, force it to be
313 /// represented as an indexed [r+r] operation.
314 bool SelectAddrIdxOnly(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
315 return PPCLowering
->SelectAddressRegRegOnly(N
, Base
, Index
, *CurDAG
);
318 /// SelectAddrImm - Returns true if the address N can be represented by
319 /// a base register plus a signed 16-bit displacement [r+imm].
320 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
322 bool SelectAddrImm(SDValue N
, SDValue
&Disp
,
324 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, None
);
327 /// SelectAddrImmX4 - Returns true if the address N can be represented by
328 /// a base register plus a signed 16-bit displacement that is a multiple of
329 /// 4 (last parameter). Suitable for use by STD and friends.
330 bool SelectAddrImmX4(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
331 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, Align(4));
334 /// SelectAddrImmX16 - Returns true if the address N can be represented by
335 /// a base register plus a signed 16-bit displacement that is a multiple of
336 /// 16(last parameter). Suitable for use by STXV and friends.
337 bool SelectAddrImmX16(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
338 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
,
342 /// SelectAddrImmX34 - Returns true if the address N can be represented by
343 /// a base register plus a signed 34-bit displacement. Suitable for use by
344 /// PSTXVP and friends.
345 bool SelectAddrImmX34(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
346 return PPCLowering
->SelectAddressRegImm34(N
, Disp
, Base
, *CurDAG
);
349 // Select an address into a single register.
350 bool SelectAddr(SDValue N
, SDValue
&Base
) {
355 bool SelectAddrPCRel(SDValue N
, SDValue
&Base
) {
356 return PPCLowering
->SelectAddressPCRel(N
, Base
);
359 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
360 /// inline asm expressions. It is always correct to compute the value into
361 /// a register. The case of adding a (possibly relocatable) constant to a
362 /// register can be improved, but it is wrong to substitute Reg+Reg for
363 /// Reg in an asm, because the load or store opcode would have to change.
364 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
365 unsigned ConstraintID
,
366 std::vector
<SDValue
> &OutOps
) override
{
367 switch(ConstraintID
) {
369 errs() << "ConstraintID: " << ConstraintID
<< "\n";
370 llvm_unreachable("Unexpected asm memory constraint");
371 case InlineAsm::Constraint_es
:
372 case InlineAsm::Constraint_m
:
373 case InlineAsm::Constraint_o
:
374 case InlineAsm::Constraint_Q
:
375 case InlineAsm::Constraint_Z
:
376 case InlineAsm::Constraint_Zy
:
377 // We need to make sure that this one operand does not end up in r0
378 // (because we might end up lowering this as 0(%op)).
379 const TargetRegisterInfo
*TRI
= Subtarget
->getRegisterInfo();
380 const TargetRegisterClass
*TRC
= TRI
->getPointerRegClass(*MF
, /*Kind=*/1);
382 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), dl
, MVT::i32
);
384 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
385 dl
, Op
.getValueType(),
388 OutOps
.push_back(NewOp
);
394 StringRef
getPassName() const override
{
395 return "PowerPC DAG->DAG Pattern Instruction Selection";
398 // Include the pieces autogenerated from the target description.
399 #include "PPCGenDAGISel.inc"
402 bool trySETCC(SDNode
*N
);
403 bool tryFoldSWTestBRCC(SDNode
*N
);
404 bool tryAsSingleRLDICL(SDNode
*N
);
405 bool tryAsSingleRLDICR(SDNode
*N
);
406 bool tryAsSingleRLWINM(SDNode
*N
);
407 bool tryAsSingleRLWINM8(SDNode
*N
);
408 bool tryAsSingleRLWIMI(SDNode
*N
);
409 bool tryAsPairOfRLDICL(SDNode
*N
);
410 bool tryAsSingleRLDIMI(SDNode
*N
);
412 void PeepholePPC64();
413 void PeepholePPC64ZExt();
414 void PeepholeCROps();
416 SDValue
combineToCMPB(SDNode
*N
);
417 void foldBoolExts(SDValue
&Res
, SDNode
*&N
);
419 bool AllUsersSelectZero(SDNode
*N
);
420 void SwapAllSelectUsers(SDNode
*N
);
422 bool isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const;
423 void transferMemOperands(SDNode
*N
, SDNode
*Result
);
426 } // end anonymous namespace
428 /// getGlobalBaseReg - Output the instructions required to put the
429 /// base address to use for accessing globals into a register.
431 SDNode
*PPCDAGToDAGISel::getGlobalBaseReg() {
432 if (!GlobalBaseReg
) {
433 const TargetInstrInfo
&TII
= *Subtarget
->getInstrInfo();
434 // Insert the set of GlobalBaseReg into the first MBB of the function
435 MachineBasicBlock
&FirstMBB
= MF
->front();
436 MachineBasicBlock::iterator MBBI
= FirstMBB
.begin();
437 const Module
*M
= MF
->getFunction().getParent();
440 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) == MVT::i32
) {
441 if (Subtarget
->isTargetELF()) {
442 GlobalBaseReg
= PPC::R30
;
443 if (!Subtarget
->isSecurePlt() &&
444 M
->getPICLevel() == PICLevel::SmallPIC
) {
445 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MoveGOTtoLR
));
446 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
447 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
449 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
450 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
451 Register TempReg
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
452 BuildMI(FirstMBB
, MBBI
, dl
,
453 TII
.get(PPC::UpdateGBR
), GlobalBaseReg
)
454 .addReg(TempReg
, RegState::Define
).addReg(GlobalBaseReg
);
455 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
459 RegInfo
->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass
);
460 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
461 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
464 // We must ensure that this sequence is dominated by the prologue.
465 // FIXME: This is a bit of a big hammer since we don't get the benefits
466 // of shrink-wrapping whenever we emit this instruction. Considering
467 // this is used in any function where we emit a jump table, this may be
468 // a significant limitation. We should consider inserting this in the
469 // block where it is used and then commoning this sequence up if it
470 // appears in multiple places.
471 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
473 MF
->getInfo
<PPCFunctionInfo
>()->setShrinkWrapDisabled(true);
474 GlobalBaseReg
= RegInfo
->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
475 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR8
));
476 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR8
), GlobalBaseReg
);
479 return CurDAG
->getRegister(GlobalBaseReg
,
480 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()))
484 // Check if a SDValue has the toc-data attribute.
485 static bool hasTocDataAttr(SDValue Val
, unsigned PointerSize
) {
486 GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Val
);
490 const GlobalVariable
*GV
= dyn_cast_or_null
<GlobalVariable
>(GA
->getGlobal());
494 if (!GV
->hasAttribute("toc-data"))
497 // TODO: These asserts should be updated as more support for the toc data
498 // transformation is added (64 bit, struct support, etc.).
500 assert(PointerSize
== 4 && "Only 32 Bit Codegen is currently supported by "
501 "the toc data transformation.");
503 assert(PointerSize
>= GV
->getAlign().valueOrOne().value() &&
504 "GlobalVariables with an alignment requirement stricter then 4-bytes "
505 "not supported by the toc data transformation.");
507 Type
*GVType
= GV
->getValueType();
509 assert(GVType
->isSized() && "A GlobalVariable's size must be known to be "
510 "supported by the toc data transformation.");
512 if (GVType
->isVectorTy())
513 report_fatal_error("A GlobalVariable of Vector type is not currently "
514 "supported by the toc data transformation.");
516 if (GVType
->isArrayTy())
517 report_fatal_error("A GlobalVariable of Array type is not currently "
518 "supported by the toc data transformation.");
520 if (GVType
->isStructTy())
521 report_fatal_error("A GlobalVariable of Struct type is not currently "
522 "supported by the toc data transformation.");
524 assert(GVType
->getPrimitiveSizeInBits() <= PointerSize
* 8 &&
525 "A GlobalVariable with size larger than 32 bits is not currently "
526 "supported by the toc data transformation.");
528 if (GV
->hasLocalLinkage() || GV
->hasPrivateLinkage())
529 report_fatal_error("A GlobalVariable with private or local linkage is not "
530 "currently supported by the toc data transformation.");
532 assert(!GV
->hasCommonLinkage() &&
533 "Tentative definitions cannot have the mapping class XMC_TD.");
538 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
539 /// operand. If so Imm will receive the 32-bit value.
540 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
541 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
542 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
548 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
549 /// operand. If so Imm will receive the 64-bit value.
550 static bool isInt64Immediate(SDNode
*N
, uint64_t &Imm
) {
551 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i64
) {
552 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
558 // isInt32Immediate - This method tests to see if a constant operand.
559 // If so Imm will receive the 32 bit value.
560 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
561 return isInt32Immediate(N
.getNode(), Imm
);
564 /// isInt64Immediate - This method tests to see if the value is a 64-bit
565 /// constant operand. If so Imm will receive the 64-bit value.
566 static bool isInt64Immediate(SDValue N
, uint64_t &Imm
) {
567 return isInt64Immediate(N
.getNode(), Imm
);
570 static unsigned getBranchHint(unsigned PCC
,
571 const FunctionLoweringInfo
&FuncInfo
,
572 const SDValue
&DestMBB
) {
573 assert(isa
<BasicBlockSDNode
>(DestMBB
));
575 if (!FuncInfo
.BPI
) return PPC::BR_NO_HINT
;
577 const BasicBlock
*BB
= FuncInfo
.MBB
->getBasicBlock();
578 const Instruction
*BBTerm
= BB
->getTerminator();
580 if (BBTerm
->getNumSuccessors() != 2) return PPC::BR_NO_HINT
;
582 const BasicBlock
*TBB
= BBTerm
->getSuccessor(0);
583 const BasicBlock
*FBB
= BBTerm
->getSuccessor(1);
585 auto TProb
= FuncInfo
.BPI
->getEdgeProbability(BB
, TBB
);
586 auto FProb
= FuncInfo
.BPI
->getEdgeProbability(BB
, FBB
);
588 // We only want to handle cases which are easy to predict at static time, e.g.
589 // C++ throw statement, that is very likely not taken, or calling never
590 // returned function, e.g. stdlib exit(). So we set Threshold to filter
593 // Below is LLVM branch weight table, we only want to handle case 1, 2
595 // Case Taken:Nontaken Example
596 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
597 // 2. Invoke-terminating 1:1048575
598 // 3. Coldblock 4:64 __builtin_expect
599 // 4. Loop Branch 124:4 For loop
600 // 5. PH/ZH/FPH 20:12
601 const uint32_t Threshold
= 10000;
603 if (std::max(TProb
, FProb
) / Threshold
< std::min(TProb
, FProb
))
604 return PPC::BR_NO_HINT
;
606 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo
.Fn
->getName()
607 << "::" << BB
->getName() << "'\n"
608 << " -> " << TBB
->getName() << ": " << TProb
<< "\n"
609 << " -> " << FBB
->getName() << ": " << FProb
<< "\n");
611 const BasicBlockSDNode
*BBDN
= cast
<BasicBlockSDNode
>(DestMBB
);
613 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
614 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
615 if (BBDN
->getBasicBlock()->getBasicBlock() != TBB
)
616 std::swap(TProb
, FProb
);
618 return (TProb
> FProb
) ? PPC::BR_TAKEN_HINT
: PPC::BR_NONTAKEN_HINT
;
621 // isOpcWithIntImmediate - This method tests to see if the node is a specific
622 // opcode and that it has a immediate integer right operand.
623 // If so Imm will receive the 32 bit value.
624 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
625 return N
->getOpcode() == Opc
626 && isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
629 void PPCDAGToDAGISel::selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
) {
631 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
632 SDValue TFI
= CurDAG
->getTargetFrameIndex(FI
, N
->getValueType(0));
633 unsigned Opc
= N
->getValueType(0) == MVT::i32
? PPC::ADDI
: PPC::ADDI8
;
635 CurDAG
->SelectNodeTo(SN
, Opc
, N
->getValueType(0), TFI
,
636 getSmallIPtrImm(Offset
, dl
));
638 ReplaceNode(SN
, CurDAG
->getMachineNode(Opc
, dl
, N
->getValueType(0), TFI
,
639 getSmallIPtrImm(Offset
, dl
)));
642 bool PPCDAGToDAGISel::isRotateAndMask(SDNode
*N
, unsigned Mask
,
643 bool isShiftMask
, unsigned &SH
,
644 unsigned &MB
, unsigned &ME
) {
645 // Don't even go down this path for i64, since different logic will be
646 // necessary for rldicl/rldicr/rldimi.
647 if (N
->getValueType(0) != MVT::i32
)
651 unsigned Indeterminant
= ~0; // bit mask marking indeterminant results
652 unsigned Opcode
= N
->getOpcode();
653 if (N
->getNumOperands() != 2 ||
654 !isInt32Immediate(N
->getOperand(1).getNode(), Shift
) || (Shift
> 31))
657 if (Opcode
== ISD::SHL
) {
658 // apply shift left to mask if it comes first
659 if (isShiftMask
) Mask
= Mask
<< Shift
;
660 // determine which bits are made indeterminant by shift
661 Indeterminant
= ~(0xFFFFFFFFu
<< Shift
);
662 } else if (Opcode
== ISD::SRL
) {
663 // apply shift right to mask if it comes first
664 if (isShiftMask
) Mask
= Mask
>> Shift
;
665 // determine which bits are made indeterminant by shift
666 Indeterminant
= ~(0xFFFFFFFFu
>> Shift
);
667 // adjust for the left rotate
669 } else if (Opcode
== ISD::ROTL
) {
675 // if the mask doesn't intersect any Indeterminant bits
676 if (Mask
&& !(Mask
& Indeterminant
)) {
678 // make sure the mask is still a mask (wrap arounds may not be)
679 return isRunOfOnes(Mask
, MB
, ME
);
684 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode
*ST
) {
685 SDValue Base
= ST
->getBasePtr();
686 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
688 SDValue Offset
= ST
->getOffset();
689 if (!Offset
.isUndef())
691 if (Base
.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
)
695 EVT MemVT
= ST
->getMemoryVT();
696 EVT RegVT
= ST
->getValue().getValueType();
699 switch (MemVT
.getSimpleVT().SimpleTy
) {
703 Opcode
= (RegVT
== MVT::i32
) ? PPC::STBXTLS_32
: PPC::STBXTLS
;
707 Opcode
= (RegVT
== MVT::i32
) ? PPC::STHXTLS_32
: PPC::STHXTLS
;
711 Opcode
= (RegVT
== MVT::i32
) ? PPC::STWXTLS_32
: PPC::STWXTLS
;
715 Opcode
= PPC::STDXTLS
;
719 SDValue Chain
= ST
->getChain();
720 SDVTList VTs
= ST
->getVTList();
721 SDValue Ops
[] = {ST
->getValue(), Base
.getOperand(0), Base
.getOperand(1),
723 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
724 transferMemOperands(ST
, MN
);
729 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode
*LD
) {
730 SDValue Base
= LD
->getBasePtr();
731 if (Base
.getOpcode() != PPCISD::ADD_TLS
)
733 SDValue Offset
= LD
->getOffset();
734 if (!Offset
.isUndef())
736 if (Base
.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
)
740 EVT MemVT
= LD
->getMemoryVT();
741 EVT RegVT
= LD
->getValueType(0);
743 switch (MemVT
.getSimpleVT().SimpleTy
) {
747 Opcode
= (RegVT
== MVT::i32
) ? PPC::LBZXTLS_32
: PPC::LBZXTLS
;
751 Opcode
= (RegVT
== MVT::i32
) ? PPC::LHZXTLS_32
: PPC::LHZXTLS
;
755 Opcode
= (RegVT
== MVT::i32
) ? PPC::LWZXTLS_32
: PPC::LWZXTLS
;
759 Opcode
= PPC::LDXTLS
;
763 SDValue Chain
= LD
->getChain();
764 SDVTList VTs
= LD
->getVTList();
765 SDValue Ops
[] = {Base
.getOperand(0), Base
.getOperand(1), Chain
};
766 SDNode
*MN
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
767 transferMemOperands(LD
, MN
);
772 /// Turn an or of two masked values into the rotate left word immediate then
773 /// mask insert (rlwimi) instruction.
774 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode
*N
) {
775 SDValue Op0
= N
->getOperand(0);
776 SDValue Op1
= N
->getOperand(1);
779 KnownBits LKnown
= CurDAG
->computeKnownBits(Op0
);
780 KnownBits RKnown
= CurDAG
->computeKnownBits(Op1
);
782 unsigned TargetMask
= LKnown
.Zero
.getZExtValue();
783 unsigned InsertMask
= RKnown
.Zero
.getZExtValue();
785 if ((TargetMask
| InsertMask
) == 0xFFFFFFFF) {
786 unsigned Op0Opc
= Op0
.getOpcode();
787 unsigned Op1Opc
= Op1
.getOpcode();
788 unsigned Value
, SH
= 0;
789 TargetMask
= ~TargetMask
;
790 InsertMask
= ~InsertMask
;
792 // If the LHS has a foldable shift and the RHS does not, then swap it to the
793 // RHS so that we can fold the shift into the insert.
794 if (Op0Opc
== ISD::AND
&& Op1Opc
== ISD::AND
) {
795 if (Op0
.getOperand(0).getOpcode() == ISD::SHL
||
796 Op0
.getOperand(0).getOpcode() == ISD::SRL
) {
797 if (Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
798 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
800 std::swap(Op0Opc
, Op1Opc
);
801 std::swap(TargetMask
, InsertMask
);
804 } else if (Op0Opc
== ISD::SHL
|| Op0Opc
== ISD::SRL
) {
805 if (Op1Opc
== ISD::AND
&& Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
806 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
808 std::swap(Op0Opc
, Op1Opc
);
809 std::swap(TargetMask
, InsertMask
);
814 if (isRunOfOnes(InsertMask
, MB
, ME
)) {
815 if ((Op1Opc
== ISD::SHL
|| Op1Opc
== ISD::SRL
) &&
816 isInt32Immediate(Op1
.getOperand(1), Value
)) {
817 Op1
= Op1
.getOperand(0);
818 SH
= (Op1Opc
== ISD::SHL
) ? Value
: 32 - Value
;
820 if (Op1Opc
== ISD::AND
) {
821 // The AND mask might not be a constant, and we need to make sure that
822 // if we're going to fold the masking with the insert, all bits not
823 // know to be zero in the mask are known to be one.
824 KnownBits MKnown
= CurDAG
->computeKnownBits(Op1
.getOperand(1));
825 bool CanFoldMask
= InsertMask
== MKnown
.One
.getZExtValue();
827 unsigned SHOpc
= Op1
.getOperand(0).getOpcode();
828 if ((SHOpc
== ISD::SHL
|| SHOpc
== ISD::SRL
) && CanFoldMask
&&
829 isInt32Immediate(Op1
.getOperand(0).getOperand(1), Value
)) {
830 // Note that Value must be in range here (less than 32) because
831 // otherwise there would not be any bits set in InsertMask.
832 Op1
= Op1
.getOperand(0).getOperand(0);
833 SH
= (SHOpc
== ISD::SHL
) ? Value
: 32 - Value
;
838 SDValue Ops
[] = { Op0
, Op1
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
840 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
847 static unsigned allUsesTruncate(SelectionDAG
*CurDAG
, SDNode
*N
) {
848 unsigned MaxTruncation
= 0;
849 // Cannot use range-based for loop here as we need the actual use (i.e. we
850 // need the operand number corresponding to the use). A range-based for
851 // will unbox the use and provide an SDNode*.
852 for (SDNode::use_iterator Use
= N
->use_begin(), UseEnd
= N
->use_end();
853 Use
!= UseEnd
; ++Use
) {
855 Use
->isMachineOpcode() ? Use
->getMachineOpcode() : Use
->getOpcode();
859 if (Use
->isMachineOpcode())
862 std::max(MaxTruncation
, (unsigned)Use
->getValueType(0).getSizeInBits());
865 if (Use
->isMachineOpcode())
867 StoreSDNode
*STN
= cast
<StoreSDNode
>(*Use
);
868 unsigned MemVTSize
= STN
->getMemoryVT().getSizeInBits();
869 if (MemVTSize
== 64 || Use
.getOperandNo() != 0)
871 MaxTruncation
= std::max(MaxTruncation
, MemVTSize
);
878 if (Use
.getOperandNo() != 0)
880 MaxTruncation
= std::max(MaxTruncation
, 32u);
886 if (Use
.getOperandNo() != 0)
888 MaxTruncation
= std::max(MaxTruncation
, 16u);
894 if (Use
.getOperandNo() != 0)
896 MaxTruncation
= std::max(MaxTruncation
, 8u);
900 return MaxTruncation
;
903 // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
904 // zeros and return the number of bits by the left of these consecutive zeros.
905 static int findContiguousZerosAtLeast(uint64_t Imm
, unsigned Num
) {
906 unsigned HiTZ
= countTrailingZeros
<uint32_t>(Hi_32(Imm
));
907 unsigned LoLZ
= countLeadingZeros
<uint32_t>(Lo_32(Imm
));
908 if ((HiTZ
+ LoLZ
) >= Num
)
913 // Direct materialization of 64-bit constants by enumerated patterns.
914 static SDNode
*selectI64ImmDirect(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
915 uint64_t Imm
, unsigned &InstCnt
) {
916 unsigned TZ
= countTrailingZeros
<uint64_t>(Imm
);
917 unsigned LZ
= countLeadingZeros
<uint64_t>(Imm
);
918 unsigned TO
= countTrailingOnes
<uint64_t>(Imm
);
919 unsigned LO
= countLeadingOnes
<uint64_t>(Imm
);
920 unsigned Hi32
= Hi_32(Imm
);
921 unsigned Lo32
= Lo_32(Imm
);
922 SDNode
*Result
= nullptr;
925 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
926 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
929 // Following patterns use 1 instructions to materialize the Imm.
931 // 1-1) Patterns : {zeros}{15-bit valve}
932 // {ones}{15-bit valve}
933 if (isInt
<16>(Imm
)) {
934 SDValue SDImm
= CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
935 return CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
937 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
938 // {ones}{15-bit valve}{16 zeros}
939 if (TZ
> 15 && (LZ
> 32 || LO
> 32))
940 return CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
,
941 getI32Imm((Imm
>> 16) & 0xffff));
943 // Following patterns use 2 instructions to materialize the Imm.
945 assert(LZ
< 64 && "Unexpected leading zeros here.");
946 // Count of ones follwing the leading zeros.
947 unsigned FO
= countLeadingOnes
<uint64_t>(Imm
<< LZ
);
948 // 2-1) Patterns : {zeros}{31-bit value}
949 // {ones}{31-bit value}
950 if (isInt
<32>(Imm
)) {
951 uint64_t ImmHi16
= (Imm
>> 16) & 0xffff;
952 unsigned Opcode
= ImmHi16
? PPC::LIS8
: PPC::LI8
;
953 Result
= CurDAG
->getMachineNode(Opcode
, dl
, MVT::i64
, getI32Imm(ImmHi16
));
954 return CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
955 getI32Imm(Imm
& 0xffff));
957 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
958 // {zeros}{15-bit value}{zeros}
959 // {zeros}{ones}{15-bit value}
960 // {ones}{15-bit value}{zeros}
961 // We can take advantage of LI's sign-extension semantics to generate leading
962 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
963 if ((LZ
+ FO
+ TZ
) > 48) {
964 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
,
965 getI32Imm((Imm
>> TZ
) & 0xffff));
966 return CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, SDValue(Result
, 0),
967 getI32Imm(TZ
), getI32Imm(LZ
));
969 // 2-3) Pattern : {zeros}{15-bit value}{ones}
970 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
971 // therefore we can take advantage of LI's sign-extension semantics, and then
972 // mask them off after rotation.
974 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
975 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
976 // +------------------------+ +------------------------+
978 // Imm (Imm >> (48 - LZ) & 0xffff)
979 // +----sext-----|--16-bit--+ +clear-|-----------------+
980 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
981 // +------------------------+ +------------------------+
983 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
984 if ((LZ
+ TO
) > 48) {
985 // Since the immediates with (LZ > 32) have been handled by previous
986 // patterns, here we have (LZ <= 32) to make sure we will not shift right
987 // the Imm by a negative value.
988 assert(LZ
<= 32 && "Unexpected shift value.");
989 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
,
990 getI32Imm((Imm
>> (48 - LZ
) & 0xffff)));
991 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
992 getI32Imm(48 - LZ
), getI32Imm(LZ
));
994 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
995 // {ones}{15-bit value}{ones}
996 // We can take advantage of LI's sign-extension semantics to generate leading
997 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1000 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1001 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1002 // +------------------------+ +------------------------+
1004 // Imm (Imm >> TO) & 0xffff
1005 // +----sext-----|--16-bit--+ +LZ|---------------------+
1006 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1007 // +------------------------+ +------------------------+
1009 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1010 if ((LZ
+ FO
+ TO
) > 48) {
1011 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
,
1012 getI32Imm((Imm
>> TO
) & 0xffff));
1013 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1014 getI32Imm(TO
), getI32Imm(LZ
));
1016 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1017 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1018 // value, we can use LI for Lo16 without generating leading ones then add the
1020 if (LZ
== 32 && ((Lo32
& 0x8000) == 0)) {
1021 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
,
1022 getI32Imm(Lo32
& 0xffff));
1023 return CurDAG
->getMachineNode(PPC::ORIS8
, dl
, MVT::i64
, SDValue(Result
, 0),
1024 getI32Imm(Lo32
>> 16));
1026 // 2-6) Patterns : {******}{49 zeros}{******}
1027 // {******}{49 ones}{******}
1028 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1029 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1030 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1033 // 1) findContiguousZerosAtLeast(Imm, 49)
1034 // +------|--zeros-|------+ +---ones--||---15 bit--+
1035 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1036 // +----------------------+ +----------------------+
1039 // 2) findContiguousZerosAtLeast(~Imm, 49)
1040 // +------|--ones--|------+ +---ones--||---15 bit--+
1041 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1042 // +----------------------+ +----------------------+
1044 if ((Shift
= findContiguousZerosAtLeast(Imm
, 49)) ||
1045 (Shift
= findContiguousZerosAtLeast(~Imm
, 49))) {
1046 uint64_t RotImm
= APInt(64, Imm
).rotr(Shift
).getZExtValue();
1047 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
,
1048 getI32Imm(RotImm
& 0xffff));
1049 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1050 getI32Imm(Shift
), getI32Imm(0));
1053 // Following patterns use 3 instructions to materialize the Imm.
1055 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1056 // {zeros}{31-bit value}{zeros}
1057 // {zeros}{ones}{31-bit value}
1058 // {ones}{31-bit value}{zeros}
1059 // We can take advantage of LIS's sign-extension semantics to generate leading
1060 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1061 // ones in both sides after rotation.
1062 if ((LZ
+ FO
+ TZ
) > 32) {
1063 uint64_t ImmHi16
= (Imm
>> (TZ
+ 16)) & 0xffff;
1064 unsigned Opcode
= ImmHi16
? PPC::LIS8
: PPC::LI8
;
1065 Result
= CurDAG
->getMachineNode(Opcode
, dl
, MVT::i64
, getI32Imm(ImmHi16
));
1066 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
1067 getI32Imm((Imm
>> TZ
) & 0xffff));
1068 return CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, SDValue(Result
, 0),
1069 getI32Imm(TZ
), getI32Imm(LZ
));
1071 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1072 // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
1073 // therefore we can take advantage of LIS's sign-extension semantics, add
1074 // the remaining bits with ORI, and then mask them off after rotation.
1075 // This is similar to Pattern 2-3, please refer to the diagram there.
1076 if ((LZ
+ TO
) > 32) {
1077 // Since the immediates with (LZ > 32) have been handled by previous
1078 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1079 // the Imm by a negative value.
1080 assert(LZ
<= 32 && "Unexpected shift value.");
1081 Result
= CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
,
1082 getI32Imm((Imm
>> (48 - LZ
)) & 0xffff));
1083 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
1084 getI32Imm((Imm
>> (32 - LZ
)) & 0xffff));
1085 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1086 getI32Imm(32 - LZ
), getI32Imm(LZ
));
1088 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1089 // {ones}{31-bit value}{ones}
1090 // We can take advantage of LIS's sign-extension semantics to generate leading
1091 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1092 // ones in left sides (if required) after rotation.
1093 // This is similar to Pattern 2-4, please refer to the diagram there.
1094 if ((LZ
+ FO
+ TO
) > 32) {
1095 Result
= CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
,
1096 getI32Imm((Imm
>> (TO
+ 16)) & 0xffff));
1097 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
1098 getI32Imm((Imm
>> TO
) & 0xffff));
1099 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1100 getI32Imm(TO
), getI32Imm(LZ
));
1102 // 3-4) Patterns : High word == Low word
1104 // Handle the first 32 bits.
1105 uint64_t ImmHi16
= (Lo32
>> 16) & 0xffff;
1106 unsigned Opcode
= ImmHi16
? PPC::LIS8
: PPC::LI8
;
1107 Result
= CurDAG
->getMachineNode(Opcode
, dl
, MVT::i64
, getI32Imm(ImmHi16
));
1108 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
1109 getI32Imm(Lo32
& 0xffff));
1110 // Use rldimi to insert the Low word into High word.
1111 SDValue Ops
[] = {SDValue(Result
, 0), SDValue(Result
, 0), getI32Imm(32),
1113 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
1115 // 3-5) Patterns : {******}{33 zeros}{******}
1116 // {******}{33 ones}{******}
1117 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1118 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1119 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1121 // This is similar to Pattern 2-6, please refer to the diagram there.
1122 if ((Shift
= findContiguousZerosAtLeast(Imm
, 33)) ||
1123 (Shift
= findContiguousZerosAtLeast(~Imm
, 33))) {
1124 uint64_t RotImm
= APInt(64, Imm
).rotr(Shift
).getZExtValue();
1125 uint64_t ImmHi16
= (RotImm
>> 16) & 0xffff;
1126 unsigned Opcode
= ImmHi16
? PPC::LIS8
: PPC::LI8
;
1127 Result
= CurDAG
->getMachineNode(Opcode
, dl
, MVT::i64
, getI32Imm(ImmHi16
));
1128 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
1129 getI32Imm(RotImm
& 0xffff));
1130 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1131 getI32Imm(Shift
), getI32Imm(0));
1138 // Try to select instructions to generate a 64 bit immediate using prefix as
1139 // well as non prefix instructions. The function will return the SDNode
1140 // to materialize that constant or it will return nullptr if it does not
1141 // find one. The variable InstCnt is set to the number of instructions that
1143 static SDNode
*selectI64ImmDirectPrefix(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
1144 uint64_t Imm
, unsigned &InstCnt
) {
1145 unsigned TZ
= countTrailingZeros
<uint64_t>(Imm
);
1146 unsigned LZ
= countLeadingZeros
<uint64_t>(Imm
);
1147 unsigned TO
= countTrailingOnes
<uint64_t>(Imm
);
1148 unsigned FO
= countLeadingOnes
<uint64_t>(LZ
== 64 ? 0 : (Imm
<< LZ
));
1149 unsigned Hi32
= Hi_32(Imm
);
1150 unsigned Lo32
= Lo_32(Imm
);
1152 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
1153 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1156 auto getI64Imm
= [CurDAG
, dl
](uint64_t Imm
) {
1157 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
1160 // Following patterns use 1 instruction to materialize Imm.
1163 // The pli instruction can materialize up to 34 bits directly.
1164 // If a constant fits within 34-bits, emit the pli instruction here directly.
1166 return CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
,
1167 CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
));
1169 // Require at least two instructions.
1171 SDNode
*Result
= nullptr;
1172 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1173 // {zeros}{33-bit value}{zeros}
1174 // {zeros}{ones}{33-bit value}
1175 // {ones}{33-bit value}{zeros}
1176 // We can take advantage of PLI's sign-extension semantics to generate leading
1177 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1178 if ((LZ
+ FO
+ TZ
) > 30) {
1179 APInt SignedInt34
= APInt(34, (Imm
>> TZ
) & 0x3ffffffff);
1180 APInt Extended
= SignedInt34
.sext(64);
1181 Result
= CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
,
1182 getI64Imm(*Extended
.getRawData()));
1183 return CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, SDValue(Result
, 0),
1184 getI32Imm(TZ
), getI32Imm(LZ
));
1186 // Pattern : {zeros}{33-bit value}{ones}
1187 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1188 // therefore we can take advantage of PLI's sign-extension semantics, and then
1189 // mask them off after rotation.
1191 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1192 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1193 // +------------------------+ +------------------------+
1196 // +----sext-----|--34-bit--+ +clear-|-----------------+
1197 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1198 // +------------------------+ +------------------------+
1200 if ((LZ
+ TO
) > 30) {
1201 APInt SignedInt34
= APInt(34, (Imm
>> (30 - LZ
)) & 0x3ffffffff);
1202 APInt Extended
= SignedInt34
.sext(64);
1203 Result
= CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
,
1204 getI64Imm(*Extended
.getRawData()));
1205 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1206 getI32Imm(30 - LZ
), getI32Imm(LZ
));
1208 // Patterns : {zeros}{ones}{33-bit value}{ones}
1209 // {ones}{33-bit value}{ones}
1210 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1211 // generate leading ones, and then use RLDICL to mask off the ones in left
1212 // sides (if required) after rotation.
1213 if ((LZ
+ FO
+ TO
) > 30) {
1214 APInt SignedInt34
= APInt(34, (Imm
>> TO
) & 0x3ffffffff);
1215 APInt Extended
= SignedInt34
.sext(64);
1216 Result
= CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
,
1217 getI64Imm(*Extended
.getRawData()));
1218 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, SDValue(Result
, 0),
1219 getI32Imm(TO
), getI32Imm(LZ
));
1221 // Patterns : {******}{31 zeros}{******}
1222 // : {******}{31 ones}{******}
1223 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1224 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1225 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1227 // +------|--ones--|------+ +---ones--||---33 bit--+
1228 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1229 // +----------------------+ +----------------------+
1231 for (unsigned Shift
= 0; Shift
< 63; ++Shift
) {
1232 uint64_t RotImm
= APInt(64, Imm
).rotr(Shift
).getZExtValue();
1233 if (isInt
<34>(RotImm
)) {
1235 CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
, getI64Imm(RotImm
));
1236 return CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
1237 SDValue(Result
, 0), getI32Imm(Shift
),
1242 // Patterns : High word == Low word
1243 // This is basically a splat of a 32 bit immediate.
1245 Result
= CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
, getI64Imm(Hi32
));
1246 SDValue Ops
[] = {SDValue(Result
, 0), SDValue(Result
, 0), getI32Imm(32),
1248 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
1253 // This pattern can form any 64 bit immediate in 3 instructions.
1255 CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
, getI64Imm(Hi32
));
1257 CurDAG
->getMachineNode(PPC::PLI8
, dl
, MVT::i64
, getI64Imm(Lo32
));
1258 SDValue Ops
[] = {SDValue(ResultLo
, 0), SDValue(ResultHi
, 0), getI32Imm(32),
1260 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
1263 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, const SDLoc
&dl
, uint64_t Imm
,
1264 unsigned *InstCnt
= nullptr) {
1265 unsigned InstCntDirect
= 0;
1266 // No more than 3 instructions is used if we can select the i64 immediate
1268 SDNode
*Result
= selectI64ImmDirect(CurDAG
, dl
, Imm
, InstCntDirect
);
1270 const PPCSubtarget
&Subtarget
=
1271 CurDAG
->getMachineFunction().getSubtarget
<PPCSubtarget
>();
1273 // If we have prefixed instructions and there is a chance we can
1274 // materialize the constant with fewer prefixed instructions than
1275 // non-prefixed, try that.
1276 if (Subtarget
.hasPrefixInstrs() && InstCntDirect
!= 1) {
1277 unsigned InstCntDirectP
= 0;
1278 SDNode
*ResultP
= selectI64ImmDirectPrefix(CurDAG
, dl
, Imm
, InstCntDirectP
);
1279 // Use the prefix case in either of two cases:
1280 // 1) We have no result from the non-prefix case to use.
1281 // 2) The non-prefix case uses more instructions than the prefix case.
1282 // If the prefix and non-prefix cases use the same number of instructions
1283 // we will prefer the non-prefix case.
1284 if (ResultP
&& (!Result
|| InstCntDirectP
< InstCntDirect
)) {
1286 *InstCnt
= InstCntDirectP
;
1293 *InstCnt
= InstCntDirect
;
1296 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
1297 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1299 // Handle the upper 32 bit value.
1301 selectI64ImmDirect(CurDAG
, dl
, Imm
& 0xffffffff00000000, InstCntDirect
);
1302 // Add in the last bits as required.
1303 if (uint32_t Hi16
= (Lo_32(Imm
) >> 16) & 0xffff) {
1304 Result
= CurDAG
->getMachineNode(PPC::ORIS8
, dl
, MVT::i64
,
1305 SDValue(Result
, 0), getI32Imm(Hi16
));
1308 if (uint32_t Lo16
= Lo_32(Imm
) & 0xffff) {
1309 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
, SDValue(Result
, 0),
1314 *InstCnt
= InstCntDirect
;
1318 // Select a 64-bit constant.
1319 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, SDNode
*N
) {
1322 // Get 64 bit value.
1323 int64_t Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
1324 if (unsigned MinSize
= allUsesTruncate(CurDAG
, N
)) {
1325 uint64_t SextImm
= SignExtend64(Imm
, MinSize
);
1326 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
1327 if (isInt
<16>(SextImm
))
1328 return CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, SDImm
);
1330 return selectI64Imm(CurDAG
, dl
, Imm
);
1335 class BitPermutationSelector
{
1339 // The bit number in the value, using a convention where bit 0 is the
1340 // lowest-order bit.
1343 // ConstZero means a bit we need to mask off.
1344 // Variable is a bit comes from an input variable.
1345 // VariableKnownToBeZero is also a bit comes from an input variable,
1346 // but it is known to be already zero. So we do not need to mask them.
1350 VariableKnownToBeZero
1353 ValueBit(SDValue V
, unsigned I
, Kind K
= Variable
)
1354 : V(V
), Idx(I
), K(K
) {}
1355 ValueBit(Kind K
= Variable
)
1356 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX
), K(K
) {}
1358 bool isZero() const {
1359 return K
== ConstZero
|| K
== VariableKnownToBeZero
;
1362 bool hasValue() const {
1363 return K
== Variable
|| K
== VariableKnownToBeZero
;
1366 SDValue
getValue() const {
1367 assert(hasValue() && "Cannot get the value of a constant bit");
1371 unsigned getValueBitIndex() const {
1372 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1377 // A bit group has the same underlying value and the same rotate factor.
1381 unsigned StartIdx
, EndIdx
;
1383 // This rotation amount assumes that the lower 32 bits of the quantity are
1384 // replicated in the high 32 bits by the rotation operator (which is done
1385 // by rlwinm and friends in 64-bit mode).
1387 // Did converting to Repl32 == true change the rotation factor? If it did,
1388 // it decreased it by 32.
1390 // Was this group coalesced after setting Repl32 to true?
1391 bool Repl32Coalesced
;
1393 BitGroup(SDValue V
, unsigned R
, unsigned S
, unsigned E
)
1394 : V(V
), RLAmt(R
), StartIdx(S
), EndIdx(E
), Repl32(false), Repl32CR(false),
1395 Repl32Coalesced(false) {
1396 LLVM_DEBUG(dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R
1397 << " [" << S
<< ", " << E
<< "]\n");
1401 // Information on each (Value, RLAmt) pair (like the number of groups
1402 // associated with each) used to choose the lowering method.
1403 struct ValueRotInfo
{
1405 unsigned RLAmt
= std::numeric_limits
<unsigned>::max();
1406 unsigned NumGroups
= 0;
1407 unsigned FirstGroupStartIdx
= std::numeric_limits
<unsigned>::max();
1408 bool Repl32
= false;
1410 ValueRotInfo() = default;
1412 // For sorting (in reverse order) by NumGroups, and then by
1413 // FirstGroupStartIdx.
1414 bool operator < (const ValueRotInfo
&Other
) const {
1415 // We need to sort so that the non-Repl32 come first because, when we're
1416 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1417 // masking operation.
1418 if (Repl32
< Other
.Repl32
)
1420 else if (Repl32
> Other
.Repl32
)
1422 else if (NumGroups
> Other
.NumGroups
)
1424 else if (NumGroups
< Other
.NumGroups
)
1426 else if (RLAmt
== 0 && Other
.RLAmt
!= 0)
1428 else if (RLAmt
!= 0 && Other
.RLAmt
== 0)
1430 else if (FirstGroupStartIdx
< Other
.FirstGroupStartIdx
)
1436 using ValueBitsMemoizedValue
= std::pair
<bool, SmallVector
<ValueBit
, 64>>;
1437 using ValueBitsMemoizer
=
1438 DenseMap
<SDValue
, std::unique_ptr
<ValueBitsMemoizedValue
>>;
1439 ValueBitsMemoizer Memoizer
;
1441 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1442 // The bool is true if something interesting was deduced, otherwise if we're
1443 // providing only a generic representation of V (or something else likewise
1444 // uninteresting for instruction selection) through the SmallVector.
1445 std::pair
<bool, SmallVector
<ValueBit
, 64> *> getValueBits(SDValue V
,
1447 auto &ValueEntry
= Memoizer
[V
];
1449 return std::make_pair(ValueEntry
->first
, &ValueEntry
->second
);
1450 ValueEntry
.reset(new ValueBitsMemoizedValue());
1451 bool &Interesting
= ValueEntry
->first
;
1452 SmallVector
<ValueBit
, 64> &Bits
= ValueEntry
->second
;
1453 Bits
.resize(NumBits
);
1455 switch (V
.getOpcode()) {
1458 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1459 unsigned RotAmt
= V
.getConstantOperandVal(1);
1461 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1463 for (unsigned i
= 0; i
< NumBits
; ++i
)
1464 Bits
[i
] = LHSBits
[i
< RotAmt
? i
+ (NumBits
- RotAmt
) : i
- RotAmt
];
1466 return std::make_pair(Interesting
= true, &Bits
);
1471 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1472 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1474 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1476 for (unsigned i
= ShiftAmt
; i
< NumBits
; ++i
)
1477 Bits
[i
] = LHSBits
[i
- ShiftAmt
];
1479 for (unsigned i
= 0; i
< ShiftAmt
; ++i
)
1480 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1482 return std::make_pair(Interesting
= true, &Bits
);
1487 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1488 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1490 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1492 for (unsigned i
= 0; i
< NumBits
- ShiftAmt
; ++i
)
1493 Bits
[i
] = LHSBits
[i
+ ShiftAmt
];
1495 for (unsigned i
= NumBits
- ShiftAmt
; i
< NumBits
; ++i
)
1496 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1498 return std::make_pair(Interesting
= true, &Bits
);
1502 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1503 uint64_t Mask
= V
.getConstantOperandVal(1);
1505 const SmallVector
<ValueBit
, 64> *LHSBits
;
1506 // Mark this as interesting, only if the LHS was also interesting. This
1507 // prevents the overall procedure from matching a single immediate 'and'
1508 // (which is non-optimal because such an and might be folded with other
1509 // things if we don't select it here).
1510 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0), NumBits
);
1512 for (unsigned i
= 0; i
< NumBits
; ++i
)
1513 if (((Mask
>> i
) & 1) == 1)
1514 Bits
[i
] = (*LHSBits
)[i
];
1516 // AND instruction masks this bit. If the input is already zero,
1517 // we have nothing to do here. Otherwise, make the bit ConstZero.
1518 if ((*LHSBits
)[i
].isZero())
1519 Bits
[i
] = (*LHSBits
)[i
];
1521 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1524 return std::make_pair(Interesting
, &Bits
);
1528 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1529 const auto &RHSBits
= *getValueBits(V
.getOperand(1), NumBits
).second
;
1531 bool AllDisjoint
= true;
1532 SDValue LastVal
= SDValue();
1533 unsigned LastIdx
= 0;
1534 for (unsigned i
= 0; i
< NumBits
; ++i
) {
1535 if (LHSBits
[i
].isZero() && RHSBits
[i
].isZero()) {
1536 // If both inputs are known to be zero and one is ConstZero and
1537 // another is VariableKnownToBeZero, we can select whichever
1538 // we like. To minimize the number of bit groups, we select
1539 // VariableKnownToBeZero if this bit is the next bit of the same
1540 // input variable from the previous bit. Otherwise, we select
1542 if (LHSBits
[i
].hasValue() && LHSBits
[i
].getValue() == LastVal
&&
1543 LHSBits
[i
].getValueBitIndex() == LastIdx
+ 1)
1544 Bits
[i
] = LHSBits
[i
];
1545 else if (RHSBits
[i
].hasValue() && RHSBits
[i
].getValue() == LastVal
&&
1546 RHSBits
[i
].getValueBitIndex() == LastIdx
+ 1)
1547 Bits
[i
] = RHSBits
[i
];
1549 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1551 else if (LHSBits
[i
].isZero())
1552 Bits
[i
] = RHSBits
[i
];
1553 else if (RHSBits
[i
].isZero())
1554 Bits
[i
] = LHSBits
[i
];
1556 AllDisjoint
= false;
1559 // We remember the value and bit index of this bit.
1560 if (Bits
[i
].hasValue()) {
1561 LastVal
= Bits
[i
].getValue();
1562 LastIdx
= Bits
[i
].getValueBitIndex();
1565 if (LastVal
) LastVal
= SDValue();
1573 return std::make_pair(Interesting
= true, &Bits
);
1575 case ISD::ZERO_EXTEND
: {
1576 // We support only the case with zero extension from i32 to i64 so far.
1577 if (V
.getValueType() != MVT::i64
||
1578 V
.getOperand(0).getValueType() != MVT::i32
)
1581 const SmallVector
<ValueBit
, 64> *LHSBits
;
1582 const unsigned NumOperandBits
= 32;
1583 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1586 for (unsigned i
= 0; i
< NumOperandBits
; ++i
)
1587 Bits
[i
] = (*LHSBits
)[i
];
1589 for (unsigned i
= NumOperandBits
; i
< NumBits
; ++i
)
1590 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1592 return std::make_pair(Interesting
, &Bits
);
1594 case ISD::TRUNCATE
: {
1595 EVT FromType
= V
.getOperand(0).getValueType();
1596 EVT ToType
= V
.getValueType();
1597 // We support only the case with truncate from i64 to i32.
1598 if (FromType
!= MVT::i64
|| ToType
!= MVT::i32
)
1600 const unsigned NumAllBits
= FromType
.getSizeInBits();
1601 SmallVector
<ValueBit
, 64> *InBits
;
1602 std::tie(Interesting
, InBits
) = getValueBits(V
.getOperand(0),
1604 const unsigned NumValidBits
= ToType
.getSizeInBits();
1606 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1607 // So, we cannot include this truncate.
1608 bool UseUpper32bit
= false;
1609 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1610 if ((*InBits
)[i
].hasValue() && (*InBits
)[i
].getValueBitIndex() >= 32) {
1611 UseUpper32bit
= true;
1617 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1618 Bits
[i
] = (*InBits
)[i
];
1620 return std::make_pair(Interesting
, &Bits
);
1622 case ISD::AssertZext
: {
1623 // For AssertZext, we look through the operand and
1624 // mark the bits known to be zero.
1625 const SmallVector
<ValueBit
, 64> *LHSBits
;
1626 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0),
1629 EVT FromType
= cast
<VTSDNode
>(V
.getOperand(1))->getVT();
1630 const unsigned NumValidBits
= FromType
.getSizeInBits();
1631 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1632 Bits
[i
] = (*LHSBits
)[i
];
1634 // These bits are known to be zero but the AssertZext may be from a value
1635 // that already has some constant zero bits (i.e. from a masking and).
1636 for (unsigned i
= NumValidBits
; i
< NumBits
; ++i
)
1637 Bits
[i
] = (*LHSBits
)[i
].hasValue()
1638 ? ValueBit((*LHSBits
)[i
].getValue(),
1639 (*LHSBits
)[i
].getValueBitIndex(),
1640 ValueBit::VariableKnownToBeZero
)
1641 : ValueBit(ValueBit::ConstZero
);
1643 return std::make_pair(Interesting
, &Bits
);
1646 LoadSDNode
*LD
= cast
<LoadSDNode
>(V
);
1647 if (ISD::isZEXTLoad(V
.getNode()) && V
.getResNo() == 0) {
1648 EVT VT
= LD
->getMemoryVT();
1649 const unsigned NumValidBits
= VT
.getSizeInBits();
1651 for (unsigned i
= 0; i
< NumValidBits
; ++i
)
1652 Bits
[i
] = ValueBit(V
, i
);
1654 // These bits are known to be zero.
1655 for (unsigned i
= NumValidBits
; i
< NumBits
; ++i
)
1656 Bits
[i
] = ValueBit(V
, i
, ValueBit::VariableKnownToBeZero
);
1658 // Zero-extending load itself cannot be optimized. So, it is not
1659 // interesting by itself though it gives useful information.
1660 return std::make_pair(Interesting
= false, &Bits
);
1665 for (unsigned i
= 0; i
< NumBits
; ++i
)
1666 Bits
[i
] = ValueBit(V
, i
);
1668 return std::make_pair(Interesting
= false, &Bits
);
1671 // For each value (except the constant ones), compute the left-rotate amount
1672 // to get it from its original to final position.
1673 void computeRotationAmounts() {
1675 RLAmt
.resize(Bits
.size());
1676 for (unsigned i
= 0; i
< Bits
.size(); ++i
)
1677 if (Bits
[i
].hasValue()) {
1678 unsigned VBI
= Bits
[i
].getValueBitIndex();
1682 RLAmt
[i
] = Bits
.size() - (VBI
- i
);
1683 } else if (Bits
[i
].isZero()) {
1685 RLAmt
[i
] = UINT32_MAX
;
1687 llvm_unreachable("Unknown value bit type");
1691 // Collect groups of consecutive bits with the same underlying value and
1692 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1693 // they break up groups.
1694 void collectBitGroups(bool LateMask
) {
1697 unsigned LastRLAmt
= RLAmt
[0];
1698 SDValue LastValue
= Bits
[0].hasValue() ? Bits
[0].getValue() : SDValue();
1699 unsigned LastGroupStartIdx
= 0;
1700 bool IsGroupOfZeros
= !Bits
[LastGroupStartIdx
].hasValue();
1701 for (unsigned i
= 1; i
< Bits
.size(); ++i
) {
1702 unsigned ThisRLAmt
= RLAmt
[i
];
1703 SDValue ThisValue
= Bits
[i
].hasValue() ? Bits
[i
].getValue() : SDValue();
1704 if (LateMask
&& !ThisValue
) {
1705 ThisValue
= LastValue
;
1706 ThisRLAmt
= LastRLAmt
;
1707 // If we're doing late masking, then the first bit group always starts
1708 // at zero (even if the first bits were zero).
1709 if (BitGroups
.empty())
1710 LastGroupStartIdx
= 0;
1713 // If this bit is known to be zero and the current group is a bit group
1714 // of zeros, we do not need to terminate the current bit group even the
1715 // Value or RLAmt does not match here. Instead, we terminate this group
1716 // when the first non-zero bit appears later.
1717 if (IsGroupOfZeros
&& Bits
[i
].isZero())
1720 // If this bit has the same underlying value and the same rotate factor as
1721 // the last one, then they're part of the same group.
1722 if (ThisRLAmt
== LastRLAmt
&& ThisValue
== LastValue
)
1723 // We cannot continue the current group if this bits is not known to
1724 // be zero in a bit group of zeros.
1725 if (!(IsGroupOfZeros
&& ThisValue
&& !Bits
[i
].isZero()))
1728 if (LastValue
.getNode())
1729 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1731 LastRLAmt
= ThisRLAmt
;
1732 LastValue
= ThisValue
;
1733 LastGroupStartIdx
= i
;
1734 IsGroupOfZeros
= !Bits
[LastGroupStartIdx
].hasValue();
1736 if (LastValue
.getNode())
1737 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1740 if (BitGroups
.empty())
1743 // We might be able to combine the first and last groups.
1744 if (BitGroups
.size() > 1) {
1745 // If the first and last groups are the same, then remove the first group
1746 // in favor of the last group, making the ending index of the last group
1747 // equal to the ending index of the to-be-removed first group.
1748 if (BitGroups
[0].StartIdx
== 0 &&
1749 BitGroups
[BitGroups
.size()-1].EndIdx
== Bits
.size()-1 &&
1750 BitGroups
[0].V
== BitGroups
[BitGroups
.size()-1].V
&&
1751 BitGroups
[0].RLAmt
== BitGroups
[BitGroups
.size()-1].RLAmt
) {
1752 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1753 BitGroups
[BitGroups
.size()-1].EndIdx
= BitGroups
[0].EndIdx
;
1754 BitGroups
.erase(BitGroups
.begin());
1759 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1760 // associated with each. If the number of groups are same, we prefer a group
1761 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1762 // instruction. If there is a degeneracy, pick the one that occurs
1763 // first (in the final value).
1764 void collectValueRotInfo() {
1767 for (auto &BG
: BitGroups
) {
1768 unsigned RLAmtKey
= BG
.RLAmt
+ (BG
.Repl32
? 64 : 0);
1769 ValueRotInfo
&VRI
= ValueRots
[std::make_pair(BG
.V
, RLAmtKey
)];
1771 VRI
.RLAmt
= BG
.RLAmt
;
1772 VRI
.Repl32
= BG
.Repl32
;
1774 VRI
.FirstGroupStartIdx
= std::min(VRI
.FirstGroupStartIdx
, BG
.StartIdx
);
1777 // Now that we've collected the various ValueRotInfo instances, we need to
1779 ValueRotsVec
.clear();
1780 for (auto &I
: ValueRots
) {
1781 ValueRotsVec
.push_back(I
.second
);
1783 llvm::sort(ValueRotsVec
);
1786 // In 64-bit mode, rlwinm and friends have a rotation operator that
1787 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1788 // indices of these instructions can only be in the lower 32 bits, so they
1789 // can only represent some 64-bit bit groups. However, when they can be used,
1790 // the 32-bit replication can be used to represent, as a single bit group,
1791 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1792 // groups when possible. Returns true if any of the bit groups were
1794 void assignRepl32BitGroups() {
1795 // If we have bits like this:
1797 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1798 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1799 // Groups: | RLAmt = 8 | RLAmt = 40 |
1801 // But, making use of a 32-bit operation that replicates the low-order 32
1802 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1805 auto IsAllLow32
= [this](BitGroup
& BG
) {
1806 if (BG
.StartIdx
<= BG
.EndIdx
) {
1807 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
) {
1808 if (!Bits
[i
].hasValue())
1810 if (Bits
[i
].getValueBitIndex() >= 32)
1814 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
) {
1815 if (!Bits
[i
].hasValue())
1817 if (Bits
[i
].getValueBitIndex() >= 32)
1820 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
) {
1821 if (!Bits
[i
].hasValue())
1823 if (Bits
[i
].getValueBitIndex() >= 32)
1831 for (auto &BG
: BitGroups
) {
1832 // If this bit group has RLAmt of 0 and will not be merged with
1833 // another bit group, we don't benefit from Repl32. We don't mark
1834 // such group to give more freedom for later instruction selection.
1835 if (BG
.RLAmt
== 0) {
1836 auto PotentiallyMerged
= [this](BitGroup
& BG
) {
1837 for (auto &BG2
: BitGroups
)
1838 if (&BG
!= &BG2
&& BG
.V
== BG2
.V
&&
1839 (BG2
.RLAmt
== 0 || BG2
.RLAmt
== 32))
1843 if (!PotentiallyMerged(BG
))
1846 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32) {
1847 if (IsAllLow32(BG
)) {
1848 if (BG
.RLAmt
>= 32) {
1855 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1856 << BG
.V
.getNode() << " RLAmt = " << BG
.RLAmt
<< " ["
1857 << BG
.StartIdx
<< ", " << BG
.EndIdx
<< "]\n");
1862 // Now walk through the bit groups, consolidating where possible.
1863 for (auto I
= BitGroups
.begin(); I
!= BitGroups
.end();) {
1864 // We might want to remove this bit group by merging it with the previous
1865 // group (which might be the ending group).
1866 auto IP
= (I
== BitGroups
.begin()) ?
1867 std::prev(BitGroups
.end()) : std::prev(I
);
1868 if (I
->Repl32
&& IP
->Repl32
&& I
->V
== IP
->V
&& I
->RLAmt
== IP
->RLAmt
&&
1869 I
->StartIdx
== (IP
->EndIdx
+ 1) % 64 && I
!= IP
) {
1871 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1872 << I
->V
.getNode() << " RLAmt = " << I
->RLAmt
<< " ["
1873 << I
->StartIdx
<< ", " << I
->EndIdx
1874 << "] with group with range [" << IP
->StartIdx
<< ", "
1875 << IP
->EndIdx
<< "]\n");
1877 IP
->EndIdx
= I
->EndIdx
;
1878 IP
->Repl32CR
= IP
->Repl32CR
|| I
->Repl32CR
;
1879 IP
->Repl32Coalesced
= true;
1880 I
= BitGroups
.erase(I
);
1883 // There is a special case worth handling: If there is a single group
1884 // covering the entire upper 32 bits, and it can be merged with both
1885 // the next and previous groups (which might be the same group), then
1886 // do so. If it is the same group (so there will be only one group in
1887 // total), then we need to reverse the order of the range so that it
1888 // covers the entire 64 bits.
1889 if (I
->StartIdx
== 32 && I
->EndIdx
== 63) {
1890 assert(std::next(I
) == BitGroups
.end() &&
1891 "bit group ends at index 63 but there is another?");
1892 auto IN
= BitGroups
.begin();
1894 if (IP
->Repl32
&& IN
->Repl32
&& I
->V
== IP
->V
&& I
->V
== IN
->V
&&
1895 (I
->RLAmt
% 32) == IP
->RLAmt
&& (I
->RLAmt
% 32) == IN
->RLAmt
&&
1896 IP
->EndIdx
== 31 && IN
->StartIdx
== 0 && I
!= IP
&&
1899 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I
->V
.getNode()
1900 << " RLAmt = " << I
->RLAmt
<< " [" << I
->StartIdx
1901 << ", " << I
->EndIdx
1902 << "] with 32-bit replicated groups with ranges ["
1903 << IP
->StartIdx
<< ", " << IP
->EndIdx
<< "] and ["
1904 << IN
->StartIdx
<< ", " << IN
->EndIdx
<< "]\n");
1907 // There is only one other group; change it to cover the whole
1908 // range (backward, so that it can still be Repl32 but cover the
1909 // whole 64-bit range).
1912 IP
->Repl32CR
= IP
->Repl32CR
|| I
->RLAmt
>= 32;
1913 IP
->Repl32Coalesced
= true;
1914 I
= BitGroups
.erase(I
);
1916 // There are two separate groups, one before this group and one
1917 // after us (at the beginning). We're going to remove this group,
1918 // but also the group at the very beginning.
1919 IP
->EndIdx
= IN
->EndIdx
;
1920 IP
->Repl32CR
= IP
->Repl32CR
|| IN
->Repl32CR
|| I
->RLAmt
>= 32;
1921 IP
->Repl32Coalesced
= true;
1922 I
= BitGroups
.erase(I
);
1923 BitGroups
.erase(BitGroups
.begin());
1926 // This must be the last group in the vector (and we might have
1927 // just invalidated the iterator above), so break here.
1937 SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
1938 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1941 uint64_t getZerosMask() {
1943 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1944 if (Bits
[i
].hasValue())
1946 Mask
|= (UINT64_C(1) << i
);
1952 // This method extends an input value to 64 bit if input is 32-bit integer.
1953 // While selecting instructions in BitPermutationSelector in 64-bit mode,
1954 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1955 // In such case, we extend it to 64 bit to be consistent with other values.
1956 SDValue
ExtendToInt64(SDValue V
, const SDLoc
&dl
) {
1957 if (V
.getValueSizeInBits() == 64)
1960 assert(V
.getValueSizeInBits() == 32);
1961 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1962 SDValue ImDef
= SDValue(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
1964 SDValue ExtVal
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
1970 SDValue
TruncateToInt32(SDValue V
, const SDLoc
&dl
) {
1971 if (V
.getValueSizeInBits() == 32)
1974 assert(V
.getValueSizeInBits() == 64);
1975 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
1976 SDValue SubVal
= SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
,
1977 MVT::i32
, V
, SubRegIdx
), 0);
1981 // Depending on the number of groups for a particular value, it might be
1982 // better to rotate, mask explicitly (using andi/andis), and then or the
1983 // result. Select this part of the result first.
1984 void SelectAndParts32(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1985 if (BPermRewriterNoMasking
)
1988 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1990 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1991 if (!Bits
[i
].hasValue() || Bits
[i
].getValue() != VRI
.V
)
1993 if (RLAmt
[i
] != VRI
.RLAmt
)
1998 // Compute the masks for andi/andis that would be necessary.
1999 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
2000 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2001 "No set bits in mask for value bit groups");
2002 bool NeedsRotate
= VRI
.RLAmt
!= 0;
2004 // We're trying to minimize the number of instructions. If we have one
2005 // group, using one of andi/andis can break even. If we have three
2006 // groups, we can use both andi and andis and break even (to use both
2007 // andi and andis we also need to or the results together). We need four
2008 // groups if we also need to rotate. To use andi/andis we need to do more
2009 // than break even because rotate-and-mask instructions tend to be easier
2012 // FIXME: We've biased here against using andi/andis, which is right for
2013 // POWER cores, but not optimal everywhere. For example, on the A2,
2014 // andi/andis have single-cycle latency whereas the rotate-and-mask
2015 // instructions take two cycles, and it would be better to bias toward
2016 // andi/andis in break-even cases.
2018 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
2019 (unsigned) (ANDIMask
!= 0) +
2020 (unsigned) (ANDISMask
!= 0) +
2021 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0) +
2022 (unsigned) (bool) Res
;
2024 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
2025 << " RL: " << VRI
.RLAmt
<< ":"
2026 << "\n\t\t\tisel using masking: " << NumAndInsts
2027 << " using rotates: " << VRI
.NumGroups
<< "\n");
2029 if (NumAndInsts
>= VRI
.NumGroups
)
2032 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2034 if (InstCnt
) *InstCnt
+= NumAndInsts
;
2039 { TruncateToInt32(VRI
.V
, dl
), getI32Imm(VRI
.RLAmt
, dl
),
2040 getI32Imm(0, dl
), getI32Imm(31, dl
) };
2041 VRot
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2044 VRot
= TruncateToInt32(VRI
.V
, dl
);
2047 SDValue ANDIVal
, ANDISVal
;
2049 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDI_rec
, dl
, MVT::i32
,
2050 VRot
, getI32Imm(ANDIMask
, dl
)),
2054 SDValue(CurDAG
->getMachineNode(PPC::ANDIS_rec
, dl
, MVT::i32
, VRot
,
2055 getI32Imm(ANDISMask
, dl
)),
2060 TotalVal
= ANDISVal
;
2064 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
2065 ANDIVal
, ANDISVal
), 0);
2070 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
2073 // Now, remove all groups with this underlying value and rotation
2075 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
2076 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
2081 // Instruction selection for the 32-bit case.
2082 SDNode
*Select32(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
2086 if (InstCnt
) *InstCnt
= 0;
2088 // Take care of cases that should use andi/andis first.
2089 SelectAndParts32(dl
, Res
, InstCnt
);
2091 // If we've not yet selected a 'starting' instruction, and we have no zeros
2092 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2093 // number of groups), and start with this rotated value.
2094 if ((!NeedMask
|| LateMask
) && !Res
) {
2095 ValueRotInfo
&VRI
= ValueRotsVec
[0];
2097 if (InstCnt
) *InstCnt
+= 1;
2099 { TruncateToInt32(VRI
.V
, dl
), getI32Imm(VRI
.RLAmt
, dl
),
2100 getI32Imm(0, dl
), getI32Imm(31, dl
) };
2101 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
),
2104 Res
= TruncateToInt32(VRI
.V
, dl
);
2107 // Now, remove all groups with this underlying value and rotation factor.
2108 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
2109 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
2113 if (InstCnt
) *InstCnt
+= BitGroups
.size();
2115 // Insert the other groups (one at a time).
2116 for (auto &BG
: BitGroups
) {
2119 { TruncateToInt32(BG
.V
, dl
), getI32Imm(BG
.RLAmt
, dl
),
2120 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
2121 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
2122 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
2125 { Res
, TruncateToInt32(BG
.V
, dl
), getI32Imm(BG
.RLAmt
, dl
),
2126 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
2127 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
2128 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
), 0);
2133 unsigned Mask
= (unsigned) getZerosMask();
2135 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
2136 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2137 "No set bits in zeros mask?");
2139 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
2140 (unsigned) (ANDISMask
!= 0) +
2141 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2143 SDValue ANDIVal
, ANDISVal
;
2145 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDI_rec
, dl
, MVT::i32
,
2146 Res
, getI32Imm(ANDIMask
, dl
)),
2150 SDValue(CurDAG
->getMachineNode(PPC::ANDIS_rec
, dl
, MVT::i32
, Res
,
2151 getI32Imm(ANDISMask
, dl
)),
2159 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
2160 ANDIVal
, ANDISVal
), 0);
2163 return Res
.getNode();
2166 unsigned SelectRotMask64Count(unsigned RLAmt
, bool Repl32
,
2167 unsigned MaskStart
, unsigned MaskEnd
,
2169 // In the notation used by the instructions, 'start' and 'end' are reversed
2170 // because bits are counted from high to low order.
2171 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
2172 InstMaskEnd
= 64 - MaskStart
- 1;
2177 if ((!IsIns
&& (InstMaskEnd
== 63 || InstMaskStart
== 0)) ||
2178 InstMaskEnd
== 63 - RLAmt
)
2184 // For 64-bit values, not all combinations of rotates and masks are
2185 // available. Produce one if it is available.
2186 SDValue
SelectRotMask64(SDValue V
, const SDLoc
&dl
, unsigned RLAmt
,
2187 bool Repl32
, unsigned MaskStart
, unsigned MaskEnd
,
2188 unsigned *InstCnt
= nullptr) {
2189 // In the notation used by the instructions, 'start' and 'end' are reversed
2190 // because bits are counted from high to low order.
2191 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
2192 InstMaskEnd
= 64 - MaskStart
- 1;
2194 if (InstCnt
) *InstCnt
+= 1;
2197 // This rotation amount assumes that the lower 32 bits of the quantity
2198 // are replicated in the high 32 bits by the rotation operator (which is
2199 // done by rlwinm and friends).
2200 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
2201 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
2203 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2204 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
2205 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM8
, dl
, MVT::i64
,
2209 if (InstMaskEnd
== 63) {
2211 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2212 getI32Imm(InstMaskStart
, dl
) };
2213 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Ops
), 0);
2216 if (InstMaskStart
== 0) {
2218 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2219 getI32Imm(InstMaskEnd
, dl
) };
2220 return SDValue(CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Ops
), 0);
2223 if (InstMaskEnd
== 63 - RLAmt
) {
2225 { ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2226 getI32Imm(InstMaskStart
, dl
) };
2227 return SDValue(CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, Ops
), 0);
2230 // We cannot do this with a single instruction, so we'll use two. The
2231 // problem is that we're not free to choose both a rotation amount and mask
2232 // start and end independently. We can choose an arbitrary mask start and
2233 // end, but then the rotation amount is fixed. Rotation, however, can be
2234 // inverted, and so by applying an "inverse" rotation first, we can get the
2236 if (InstCnt
) *InstCnt
+= 1;
2238 // The rotation mask for the second instruction must be MaskStart.
2239 unsigned RLAmt2
= MaskStart
;
2240 // The first instruction must rotate V so that the overall rotation amount
2242 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
2244 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
2245 return SelectRotMask64(V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
2248 // For 64-bit values, not all combinations of rotates and masks are
2249 // available. Produce a rotate-mask-and-insert if one is available.
2250 SDValue
SelectRotMaskIns64(SDValue Base
, SDValue V
, const SDLoc
&dl
,
2251 unsigned RLAmt
, bool Repl32
, unsigned MaskStart
,
2252 unsigned MaskEnd
, unsigned *InstCnt
= nullptr) {
2253 // In the notation used by the instructions, 'start' and 'end' are reversed
2254 // because bits are counted from high to low order.
2255 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
2256 InstMaskEnd
= 64 - MaskStart
- 1;
2258 if (InstCnt
) *InstCnt
+= 1;
2261 // This rotation amount assumes that the lower 32 bits of the quantity
2262 // are replicated in the high 32 bits by the rotation operator (which is
2263 // done by rlwinm and friends).
2264 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
2265 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
2267 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2268 getI32Imm(InstMaskStart
- 32, dl
), getI32Imm(InstMaskEnd
- 32, dl
) };
2269 return SDValue(CurDAG
->getMachineNode(PPC::RLWIMI8
, dl
, MVT::i64
,
2273 if (InstMaskEnd
== 63 - RLAmt
) {
2275 { ExtendToInt64(Base
, dl
), ExtendToInt64(V
, dl
), getI32Imm(RLAmt
, dl
),
2276 getI32Imm(InstMaskStart
, dl
) };
2277 return SDValue(CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
), 0);
2280 // We cannot do this with a single instruction, so we'll use two. The
2281 // problem is that we're not free to choose both a rotation amount and mask
2282 // start and end independently. We can choose an arbitrary mask start and
2283 // end, but then the rotation amount is fixed. Rotation, however, can be
2284 // inverted, and so by applying an "inverse" rotation first, we can get the
2286 if (InstCnt
) *InstCnt
+= 1;
2288 // The rotation mask for the second instruction must be MaskStart.
2289 unsigned RLAmt2
= MaskStart
;
2290 // The first instruction must rotate V so that the overall rotation amount
2292 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
2294 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
2295 return SelectRotMaskIns64(Base
, V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
2298 void SelectAndParts64(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
2299 if (BPermRewriterNoMasking
)
2302 // The idea here is the same as in the 32-bit version, but with additional
2303 // complications from the fact that Repl32 might be true. Because we
2304 // aggressively convert bit groups to Repl32 form (which, for small
2305 // rotation factors, involves no other change), and then coalesce, it might
2306 // be the case that a single 64-bit masking operation could handle both
2307 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2308 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2309 // completely capture the new combined bit group.
2311 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
2314 // We need to add to the mask all bits from the associated bit groups.
2315 // If Repl32 is false, we need to add bits from bit groups that have
2316 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2317 // group is trivially convertable if it overlaps only with the lower 32
2318 // bits, and the group has not been coalesced.
2319 auto MatchingBG
= [VRI
](const BitGroup
&BG
) {
2323 unsigned EffRLAmt
= BG
.RLAmt
;
2324 if (!VRI
.Repl32
&& BG
.Repl32
) {
2325 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
<= BG
.EndIdx
&&
2326 !BG
.Repl32Coalesced
) {
2332 } else if (VRI
.Repl32
!= BG
.Repl32
) {
2336 return VRI
.RLAmt
== EffRLAmt
;
2339 for (auto &BG
: BitGroups
) {
2340 if (!MatchingBG(BG
))
2343 if (BG
.StartIdx
<= BG
.EndIdx
) {
2344 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
)
2345 Mask
|= (UINT64_C(1) << i
);
2347 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
)
2348 Mask
|= (UINT64_C(1) << i
);
2349 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
)
2350 Mask
|= (UINT64_C(1) << i
);
2354 // We can use the 32-bit andi/andis technique if the mask does not
2355 // require any higher-order bits. This can save an instruction compared
2356 // to always using the general 64-bit technique.
2357 bool Use32BitInsts
= isUInt
<32>(Mask
);
2358 // Compute the masks for andi/andis that would be necessary.
2359 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2360 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2362 bool NeedsRotate
= VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
));
2364 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
2365 (unsigned) (bool) Res
;
2366 unsigned NumOfSelectInsts
= 0;
2367 selectI64Imm(CurDAG
, dl
, Mask
, &NumOfSelectInsts
);
2368 assert(NumOfSelectInsts
> 0 && "Failed to select an i64 constant.");
2370 NumAndInsts
+= (unsigned) (ANDIMask
!= 0) + (unsigned) (ANDISMask
!= 0) +
2371 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2373 NumAndInsts
+= NumOfSelectInsts
+ /* and */ 1;
2375 unsigned NumRLInsts
= 0;
2376 bool FirstBG
= true;
2377 bool MoreBG
= false;
2378 for (auto &BG
: BitGroups
) {
2379 if (!MatchingBG(BG
)) {
2384 SelectRotMask64Count(BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
, BG
.EndIdx
,
2389 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode()
2390 << " RL: " << VRI
.RLAmt
<< (VRI
.Repl32
? " (32):" : ":")
2391 << "\n\t\t\tisel using masking: " << NumAndInsts
2392 << " using rotates: " << NumRLInsts
<< "\n");
2394 // When we'd use andi/andis, we bias toward using the rotates (andi only
2395 // has a record form, and is cracked on POWER cores). However, when using
2396 // general 64-bit constant formation, bias toward the constant form,
2397 // because that exposes more opportunities for CSE.
2398 if (NumAndInsts
> NumRLInsts
)
2400 // When merging multiple bit groups, instruction or is used.
2401 // But when rotate is used, rldimi can inert the rotated value into any
2402 // register, so instruction or can be avoided.
2403 if ((Use32BitInsts
|| MoreBG
) && NumAndInsts
== NumRLInsts
)
2406 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2408 if (InstCnt
) *InstCnt
+= NumAndInsts
;
2411 // We actually need to generate a rotation if we have a non-zero rotation
2412 // factor or, in the Repl32 case, if we care about any of the
2413 // higher-order replicated bits. In the latter case, we generate a mask
2414 // backward so that it actually includes the entire 64 bits.
2415 if (VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
)))
2416 VRot
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2417 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63);
2422 if (Use32BitInsts
) {
2423 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2424 "No set bits in mask when using 32-bit ands for 64-bit value");
2426 SDValue ANDIVal
, ANDISVal
;
2428 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDI8_rec
, dl
, MVT::i64
,
2429 ExtendToInt64(VRot
, dl
),
2430 getI32Imm(ANDIMask
, dl
)),
2434 SDValue(CurDAG
->getMachineNode(PPC::ANDIS8_rec
, dl
, MVT::i64
,
2435 ExtendToInt64(VRot
, dl
),
2436 getI32Imm(ANDISMask
, dl
)),
2440 TotalVal
= ANDISVal
;
2444 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2445 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2447 TotalVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
2449 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2450 ExtendToInt64(VRot
, dl
), TotalVal
),
2457 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2458 ExtendToInt64(Res
, dl
), TotalVal
),
2461 // Now, remove all groups with this underlying value and rotation
2463 eraseMatchingBitGroups(MatchingBG
);
2467 // Instruction selection for the 64-bit case.
2468 SDNode
*Select64(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
2472 if (InstCnt
) *InstCnt
= 0;
2474 // Take care of cases that should use andi/andis first.
2475 SelectAndParts64(dl
, Res
, InstCnt
);
2477 // If we've not yet selected a 'starting' instruction, and we have no zeros
2478 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2479 // number of groups), and start with this rotated value.
2480 if ((!NeedMask
|| LateMask
) && !Res
) {
2481 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2482 // groups will come first, and so the VRI representing the largest number
2483 // of groups might not be first (it might be the first Repl32 groups).
2484 unsigned MaxGroupsIdx
= 0;
2485 if (!ValueRotsVec
[0].Repl32
) {
2486 for (unsigned i
= 0, ie
= ValueRotsVec
.size(); i
< ie
; ++i
)
2487 if (ValueRotsVec
[i
].Repl32
) {
2488 if (ValueRotsVec
[i
].NumGroups
> ValueRotsVec
[0].NumGroups
)
2494 ValueRotInfo
&VRI
= ValueRotsVec
[MaxGroupsIdx
];
2495 bool NeedsRotate
= false;
2498 } else if (VRI
.Repl32
) {
2499 for (auto &BG
: BitGroups
) {
2500 if (BG
.V
!= VRI
.V
|| BG
.RLAmt
!= VRI
.RLAmt
||
2501 BG
.Repl32
!= VRI
.Repl32
)
2504 // We don't need a rotate if the bit group is confined to the lower
2506 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
< BG
.EndIdx
)
2515 Res
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
2516 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63,
2521 // Now, remove all groups with this underlying value and rotation factor.
2523 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
2524 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
&&
2525 BG
.Repl32
== VRI
.Repl32
;
2529 // Because 64-bit rotates are more flexible than inserts, we might have a
2530 // preference regarding which one we do first (to save one instruction).
2532 for (auto I
= BitGroups
.begin(), IE
= BitGroups
.end(); I
!= IE
; ++I
) {
2533 if (SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2535 SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
2537 if (I
!= BitGroups
.begin()) {
2540 BitGroups
.insert(BitGroups
.begin(), BG
);
2547 // Insert the other groups (one at a time).
2548 for (auto &BG
: BitGroups
) {
2550 Res
= SelectRotMask64(BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
,
2551 BG
.EndIdx
, InstCnt
);
2553 Res
= SelectRotMaskIns64(Res
, BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
,
2554 BG
.StartIdx
, BG
.EndIdx
, InstCnt
);
2558 uint64_t Mask
= getZerosMask();
2560 // We can use the 32-bit andi/andis technique if the mask does not
2561 // require any higher-order bits. This can save an instruction compared
2562 // to always using the general 64-bit technique.
2563 bool Use32BitInsts
= isUInt
<32>(Mask
);
2564 // Compute the masks for andi/andis that would be necessary.
2565 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
2566 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
2568 if (Use32BitInsts
) {
2569 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
2570 "No set bits in mask when using 32-bit ands for 64-bit value");
2572 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
2573 (unsigned) (ANDISMask
!= 0) +
2574 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
2576 SDValue ANDIVal
, ANDISVal
;
2578 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDI8_rec
, dl
, MVT::i64
,
2579 ExtendToInt64(Res
, dl
),
2580 getI32Imm(ANDIMask
, dl
)),
2584 SDValue(CurDAG
->getMachineNode(PPC::ANDIS8_rec
, dl
, MVT::i64
,
2585 ExtendToInt64(Res
, dl
),
2586 getI32Imm(ANDISMask
, dl
)),
2594 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2595 ExtendToInt64(ANDIVal
, dl
), ANDISVal
), 0);
2597 unsigned NumOfSelectInsts
= 0;
2599 SDValue(selectI64Imm(CurDAG
, dl
, Mask
, &NumOfSelectInsts
), 0);
2600 Res
= SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
2601 ExtendToInt64(Res
, dl
), MaskVal
),
2604 *InstCnt
+= NumOfSelectInsts
+ /* and */ 1;
2608 return Res
.getNode();
2611 SDNode
*Select(SDNode
*N
, bool LateMask
, unsigned *InstCnt
= nullptr) {
2612 // Fill in BitGroups.
2613 collectBitGroups(LateMask
);
2614 if (BitGroups
.empty())
2617 // For 64-bit values, figure out when we can use 32-bit instructions.
2618 if (Bits
.size() == 64)
2619 assignRepl32BitGroups();
2621 // Fill in ValueRotsVec.
2622 collectValueRotInfo();
2624 if (Bits
.size() == 32) {
2625 return Select32(N
, LateMask
, InstCnt
);
2627 assert(Bits
.size() == 64 && "Not 64 bits here?");
2628 return Select64(N
, LateMask
, InstCnt
);
2634 void eraseMatchingBitGroups(function_ref
<bool(const BitGroup
&)> F
) {
2635 erase_if(BitGroups
, F
);
2638 SmallVector
<ValueBit
, 64> Bits
;
2640 bool NeedMask
= false;
2641 SmallVector
<unsigned, 64> RLAmt
;
2643 SmallVector
<BitGroup
, 16> BitGroups
;
2645 DenseMap
<std::pair
<SDValue
, unsigned>, ValueRotInfo
> ValueRots
;
2646 SmallVector
<ValueRotInfo
, 16> ValueRotsVec
;
2648 SelectionDAG
*CurDAG
= nullptr;
2651 BitPermutationSelector(SelectionDAG
*DAG
)
2654 // Here we try to match complex bit permutations into a set of
2655 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2656 // known to produce optimal code for common cases (like i32 byte swapping).
2657 SDNode
*Select(SDNode
*N
) {
2660 getValueBits(SDValue(N
, 0), N
->getValueType(0).getSizeInBits());
2663 Bits
= std::move(*Result
.second
);
2665 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2666 " selection for: ");
2667 LLVM_DEBUG(N
->dump(CurDAG
));
2669 // Fill it RLAmt and set NeedMask.
2670 computeRotationAmounts();
2673 return Select(N
, false);
2675 // We currently have two techniques for handling results with zeros: early
2676 // masking (the default) and late masking. Late masking is sometimes more
2677 // efficient, but because the structure of the bit groups is different, it
2678 // is hard to tell without generating both and comparing the results. With
2679 // late masking, we ignore zeros in the resulting value when inserting each
2680 // set of bit groups, and then mask in the zeros at the end. With early
2681 // masking, we only insert the non-zero parts of the result at every step.
2683 unsigned InstCnt
= 0, InstCntLateMask
= 0;
2684 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2685 SDNode
*RN
= Select(N
, false, &InstCnt
);
2686 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt
<< " instructions\n");
2688 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2689 SDNode
*RNLM
= Select(N
, true, &InstCntLateMask
);
2690 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2691 << " instructions\n");
2693 if (InstCnt
<= InstCntLateMask
) {
2694 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2698 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2703 class IntegerCompareEliminator
{
2704 SelectionDAG
*CurDAG
;
2706 // Conversion type for interpreting results of a 32-bit instruction as
2707 // a 64-bit value or vice versa.
2708 enum ExtOrTruncConversion
{ Ext
, Trunc
};
2710 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2712 // ZExtOrig - use the original condition code, zero-extend value
2713 // ZExtInvert - invert the condition code, zero-extend value
2714 // SExtOrig - use the original condition code, sign-extend value
2715 // SExtInvert - invert the condition code, sign-extend value
2716 enum SetccInGPROpts
{ ZExtOrig
, ZExtInvert
, SExtOrig
, SExtInvert
};
2718 // Comparisons against zero to emit GPR code sequences for. Each of these
2719 // sequences may need to be emitted for two or more equivalent patterns.
2720 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2721 // matters as well as the extension type: sext (-1/0), zext (1/0).
2722 // GEZExt - (zext (LHS >= 0))
2723 // GESExt - (sext (LHS >= 0))
2724 // LEZExt - (zext (LHS <= 0))
2725 // LESExt - (sext (LHS <= 0))
2726 enum ZeroCompare
{ GEZExt
, GESExt
, LEZExt
, LESExt
};
2728 SDNode
*tryEXTEND(SDNode
*N
);
2729 SDNode
*tryLogicOpOfCompares(SDNode
*N
);
2730 SDValue
computeLogicOpInGPR(SDValue LogicOp
);
2731 SDValue
signExtendInputIfNeeded(SDValue Input
);
2732 SDValue
zeroExtendInputIfNeeded(SDValue Input
);
2733 SDValue
addExtOrTrunc(SDValue NatWidthRes
, ExtOrTruncConversion Conv
);
2734 SDValue
getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2736 SDValue
get32BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2737 int64_t RHSValue
, SDLoc dl
);
2738 SDValue
get32BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2739 int64_t RHSValue
, SDLoc dl
);
2740 SDValue
get64BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2741 int64_t RHSValue
, SDLoc dl
);
2742 SDValue
get64BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2743 int64_t RHSValue
, SDLoc dl
);
2744 SDValue
getSETCCInGPR(SDValue Compare
, SetccInGPROpts ConvOpts
);
2747 IntegerCompareEliminator(SelectionDAG
*DAG
,
2748 PPCDAGToDAGISel
*Sel
) : CurDAG(DAG
), S(Sel
) {
2749 assert(CurDAG
->getTargetLoweringInfo()
2750 .getPointerTy(CurDAG
->getDataLayout()).getSizeInBits() == 64 &&
2751 "Only expecting to use this on 64 bit targets.");
2753 SDNode
*Select(SDNode
*N
) {
2754 if (CmpInGPR
== ICGPR_None
)
2756 switch (N
->getOpcode()) {
2758 case ISD::ZERO_EXTEND
:
2759 if (CmpInGPR
== ICGPR_Sext
|| CmpInGPR
== ICGPR_SextI32
||
2760 CmpInGPR
== ICGPR_SextI64
)
2763 case ISD::SIGN_EXTEND
:
2764 if (CmpInGPR
== ICGPR_Zext
|| CmpInGPR
== ICGPR_ZextI32
||
2765 CmpInGPR
== ICGPR_ZextI64
)
2767 return tryEXTEND(N
);
2771 return tryLogicOpOfCompares(N
);
2777 static bool isLogicOp(unsigned Opc
) {
2778 return Opc
== ISD::AND
|| Opc
== ISD::OR
|| Opc
== ISD::XOR
;
2780 // The obvious case for wanting to keep the value in a GPR. Namely, the
2781 // result of the comparison is actually needed in a GPR.
2782 SDNode
*IntegerCompareEliminator::tryEXTEND(SDNode
*N
) {
2783 assert((N
->getOpcode() == ISD::ZERO_EXTEND
||
2784 N
->getOpcode() == ISD::SIGN_EXTEND
) &&
2785 "Expecting a zero/sign extend node!");
2787 // If we are zero-extending the result of a logical operation on i1
2788 // values, we can keep the values in GPRs.
2789 if (isLogicOp(N
->getOperand(0).getOpcode()) &&
2790 N
->getOperand(0).getValueType() == MVT::i1
&&
2791 N
->getOpcode() == ISD::ZERO_EXTEND
)
2792 WideRes
= computeLogicOpInGPR(N
->getOperand(0));
2793 else if (N
->getOperand(0).getOpcode() != ISD::SETCC
)
2797 getSETCCInGPR(N
->getOperand(0),
2798 N
->getOpcode() == ISD::SIGN_EXTEND
?
2799 SetccInGPROpts::SExtOrig
: SetccInGPROpts::ZExtOrig
);
2805 bool Input32Bit
= WideRes
.getValueType() == MVT::i32
;
2806 bool Output32Bit
= N
->getValueType(0) == MVT::i32
;
2808 NumSextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 1 : 0;
2809 NumZextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 0 : 1;
2811 SDValue ConvOp
= WideRes
;
2812 if (Input32Bit
!= Output32Bit
)
2813 ConvOp
= addExtOrTrunc(WideRes
, Input32Bit
? ExtOrTruncConversion::Ext
:
2814 ExtOrTruncConversion::Trunc
);
2815 return ConvOp
.getNode();
2818 // Attempt to perform logical operations on the results of comparisons while
2819 // keeping the values in GPRs. Without doing so, these would end up being
2820 // lowered to CR-logical operations which suffer from significant latency and
2822 SDNode
*IntegerCompareEliminator::tryLogicOpOfCompares(SDNode
*N
) {
2823 if (N
->getValueType(0) != MVT::i1
)
2825 assert(isLogicOp(N
->getOpcode()) &&
2826 "Expected a logic operation on setcc results.");
2827 SDValue LoweredLogical
= computeLogicOpInGPR(SDValue(N
, 0));
2828 if (!LoweredLogical
)
2832 bool IsBitwiseNegate
= LoweredLogical
.getMachineOpcode() == PPC::XORI8
;
2833 unsigned SubRegToExtract
= IsBitwiseNegate
? PPC::sub_eq
: PPC::sub_gt
;
2834 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
2835 SDValue LHS
= LoweredLogical
.getOperand(0);
2836 SDValue RHS
= LoweredLogical
.getOperand(1);
2838 SDValue OpToConvToRecForm
;
2840 // Look through any 32-bit to 64-bit implicit extend nodes to find the
2841 // opcode that is input to the XORI.
2842 if (IsBitwiseNegate
&&
2843 LoweredLogical
.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG
)
2844 OpToConvToRecForm
= LoweredLogical
.getOperand(0).getOperand(1);
2845 else if (IsBitwiseNegate
)
2846 // If the input to the XORI isn't an extension, that's what we're after.
2847 OpToConvToRecForm
= LoweredLogical
.getOperand(0);
2849 // If this is not an XORI, it is a reg-reg logical op and we can convert
2850 // it to record-form.
2851 OpToConvToRecForm
= LoweredLogical
;
2853 // Get the record-form version of the node we're looking to use to get the
2855 uint16_t NonRecOpc
= OpToConvToRecForm
.getMachineOpcode();
2856 int NewOpc
= PPCInstrInfo::getRecordFormOpcode(NonRecOpc
);
2858 // Convert the right node to record-form. This is either the logical we're
2859 // looking at or it is the input node to the negation (if we're looking at
2860 // a bitwise negation).
2861 if (NewOpc
!= -1 && IsBitwiseNegate
) {
2862 // The input to the XORI has a record-form. Use it.
2863 assert(LoweredLogical
.getConstantOperandVal(1) == 1 &&
2864 "Expected a PPC::XORI8 only for bitwise negation.");
2865 // Emit the record-form instruction.
2866 std::vector
<SDValue
> Ops
;
2867 for (int i
= 0, e
= OpToConvToRecForm
.getNumOperands(); i
< e
; i
++)
2868 Ops
.push_back(OpToConvToRecForm
.getOperand(i
));
2871 SDValue(CurDAG
->getMachineNode(NewOpc
, dl
,
2872 OpToConvToRecForm
.getValueType(),
2873 MVT::Glue
, Ops
), 0);
2875 assert((NewOpc
!= -1 || !IsBitwiseNegate
) &&
2876 "No record form available for AND8/OR8/XOR8?");
2878 SDValue(CurDAG
->getMachineNode(NewOpc
== -1 ? PPC::ANDI8_rec
: NewOpc
,
2879 dl
, MVT::i64
, MVT::Glue
, LHS
, RHS
),
2883 // Select this node to a single bit from CR0 set by the record-form node
2884 // just created. For bitwise negation, use the EQ bit which is the equivalent
2885 // of negating the result (i.e. it is a bit set when the result of the
2886 // operation is zero).
2888 CurDAG
->getTargetConstant(SubRegToExtract
, dl
, MVT::i32
);
2890 SDValue(CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
, dl
,
2891 MVT::i1
, CR0Reg
, SRIdxVal
,
2892 WideOp
.getValue(1)), 0);
2893 return CRBit
.getNode();
2896 // Lower a logical operation on i1 values into a GPR sequence if possible.
2897 // The result can be kept in a GPR if requested.
2898 // Three types of inputs can be handled:
2901 // - Logical operation (AND/OR/XOR)
2902 // There is also a special case that is handled (namely a complement operation
2903 // achieved with xor %a, -1).
2904 SDValue
IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp
) {
2905 assert(isLogicOp(LogicOp
.getOpcode()) &&
2906 "Can only handle logic operations here.");
2907 assert(LogicOp
.getValueType() == MVT::i1
&&
2908 "Can only handle logic operations on i1 values here.");
2912 // Special case: xor %a, -1
2913 bool IsBitwiseNegation
= isBitwiseNot(LogicOp
);
2915 // Produces a GPR sequence for each operand of the binary logic operation.
2916 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2917 // the value in a GPR and for logic operations, it will recursively produce
2918 // a GPR sequence for the operation.
2919 auto getLogicOperand
= [&] (SDValue Operand
) -> SDValue
{
2920 unsigned OperandOpcode
= Operand
.getOpcode();
2921 if (OperandOpcode
== ISD::SETCC
)
2922 return getSETCCInGPR(Operand
, SetccInGPROpts::ZExtOrig
);
2923 else if (OperandOpcode
== ISD::TRUNCATE
) {
2924 SDValue InputOp
= Operand
.getOperand(0);
2925 EVT InVT
= InputOp
.getValueType();
2926 return SDValue(CurDAG
->getMachineNode(InVT
== MVT::i32
? PPC::RLDICL_32
:
2927 PPC::RLDICL
, dl
, InVT
, InputOp
,
2928 S
->getI64Imm(0, dl
),
2929 S
->getI64Imm(63, dl
)), 0);
2930 } else if (isLogicOp(OperandOpcode
))
2931 return computeLogicOpInGPR(Operand
);
2934 LHS
= getLogicOperand(LogicOp
.getOperand(0));
2935 RHS
= getLogicOperand(LogicOp
.getOperand(1));
2937 // If a GPR sequence can't be produced for the LHS we can't proceed.
2938 // Not producing a GPR sequence for the RHS is only a problem if this isn't
2939 // a bitwise negation operation.
2940 if (!LHS
|| (!RHS
&& !IsBitwiseNegation
))
2943 NumLogicOpsOnComparison
++;
2945 // We will use the inputs as 64-bit values.
2946 if (LHS
.getValueType() == MVT::i32
)
2947 LHS
= addExtOrTrunc(LHS
, ExtOrTruncConversion::Ext
);
2948 if (!IsBitwiseNegation
&& RHS
.getValueType() == MVT::i32
)
2949 RHS
= addExtOrTrunc(RHS
, ExtOrTruncConversion::Ext
);
2952 switch (LogicOp
.getOpcode()) {
2953 default: llvm_unreachable("Unknown logic operation.");
2954 case ISD::AND
: NewOpc
= PPC::AND8
; break;
2955 case ISD::OR
: NewOpc
= PPC::OR8
; break;
2956 case ISD::XOR
: NewOpc
= PPC::XOR8
; break;
2959 if (IsBitwiseNegation
) {
2960 RHS
= S
->getI64Imm(1, dl
);
2961 NewOpc
= PPC::XORI8
;
2964 return SDValue(CurDAG
->getMachineNode(NewOpc
, dl
, MVT::i64
, LHS
, RHS
), 0);
2968 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2969 /// Otherwise just reinterpret it as a 64-bit value.
2970 /// Useful when emitting comparison code for 32-bit values without using
2971 /// the compare instruction (which only considers the lower 32-bits).
2972 SDValue
IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input
) {
2973 assert(Input
.getValueType() == MVT::i32
&&
2974 "Can only sign-extend 32-bit values here.");
2975 unsigned Opc
= Input
.getOpcode();
2977 // The value was sign extended and then truncated to 32-bits. No need to
2978 // sign extend it again.
2979 if (Opc
== ISD::TRUNCATE
&&
2980 (Input
.getOperand(0).getOpcode() == ISD::AssertSext
||
2981 Input
.getOperand(0).getOpcode() == ISD::SIGN_EXTEND
))
2982 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2984 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2985 // The input is a sign-extending load. All ppc sign-extending loads
2986 // sign-extend to the full 64-bits.
2987 if (InputLoad
&& InputLoad
->getExtensionType() == ISD::SEXTLOAD
)
2988 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2990 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2991 // We don't sign-extend constants.
2993 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2996 SignExtensionsAdded
++;
2997 return SDValue(CurDAG
->getMachineNode(PPC::EXTSW_32_64
, dl
,
2998 MVT::i64
, Input
), 0);
3001 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3002 /// Otherwise just reinterpret it as a 64-bit value.
3003 /// Useful when emitting comparison code for 32-bit values without using
3004 /// the compare instruction (which only considers the lower 32-bits).
3005 SDValue
IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input
) {
3006 assert(Input
.getValueType() == MVT::i32
&&
3007 "Can only zero-extend 32-bit values here.");
3008 unsigned Opc
= Input
.getOpcode();
3010 // The only condition under which we can omit the actual extend instruction:
3011 // - The value is a positive constant
3012 // - The value comes from a load that isn't a sign-extending load
3013 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3014 bool IsTruncateOfZExt
= Opc
== ISD::TRUNCATE
&&
3015 (Input
.getOperand(0).getOpcode() == ISD::AssertZext
||
3016 Input
.getOperand(0).getOpcode() == ISD::ZERO_EXTEND
);
3017 if (IsTruncateOfZExt
)
3018 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
3020 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
3021 if (InputConst
&& InputConst
->getSExtValue() >= 0)
3022 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
3024 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
3025 // The input is a load that doesn't sign-extend (it will be zero-extended).
3026 if (InputLoad
&& InputLoad
->getExtensionType() != ISD::SEXTLOAD
)
3027 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
3029 // None of the above, need to zero-extend.
3031 ZeroExtensionsAdded
++;
3032 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL_32_64
, dl
, MVT::i64
, Input
,
3033 S
->getI64Imm(0, dl
),
3034 S
->getI64Imm(32, dl
)), 0);
3037 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3038 // course not actual zero/sign extensions that will generate machine code,
3039 // they're just a way to reinterpret a 32 bit value in a register as a
3040 // 64 bit value and vice-versa.
3041 SDValue
IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes
,
3042 ExtOrTruncConversion Conv
) {
3043 SDLoc
dl(NatWidthRes
);
3045 // For reinterpreting 32-bit values as 64 bit values, we generate
3046 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3047 if (Conv
== ExtOrTruncConversion::Ext
) {
3048 SDValue
ImDef(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
, MVT::i64
), 0);
3050 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
3051 return SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
, MVT::i64
,
3052 ImDef
, NatWidthRes
, SubRegIdx
), 0);
3055 assert(Conv
== ExtOrTruncConversion::Trunc
&&
3056 "Unknown convertion between 32 and 64 bit values.");
3057 // For reinterpreting 64-bit values as 32-bit values, we just need to
3058 // EXTRACT_SUBREG (i.e. extract the low word).
3060 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
3061 return SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
, MVT::i32
,
3062 NatWidthRes
, SubRegIdx
), 0);
3065 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3066 // Handle both zero-extensions and sign-extensions.
3068 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
3069 ZeroCompare CmpTy
) {
3070 EVT InVT
= LHS
.getValueType();
3071 bool Is32Bit
= InVT
== MVT::i32
;
3074 // Produce the value that needs to be either zero or sign extended.
3076 case ZeroCompare::GEZExt
:
3077 case ZeroCompare::GESExt
:
3078 ToExtend
= SDValue(CurDAG
->getMachineNode(Is32Bit
? PPC::NOR
: PPC::NOR8
,
3079 dl
, InVT
, LHS
, LHS
), 0);
3081 case ZeroCompare::LEZExt
:
3082 case ZeroCompare::LESExt
: {
3084 // Upper 32 bits cannot be undefined for this sequence.
3085 LHS
= signExtendInputIfNeeded(LHS
);
3087 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
3089 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3090 Neg
, S
->getI64Imm(1, dl
),
3091 S
->getI64Imm(63, dl
)), 0);
3094 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3095 S
->getI64Imm(~0ULL, dl
)), 0);
3096 ToExtend
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
3103 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3105 (CmpTy
== ZeroCompare::GEZExt
|| CmpTy
== ZeroCompare::LEZExt
))
3106 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3107 ToExtend
, S
->getI64Imm(1, dl
),
3108 S
->getI64Imm(63, dl
)), 0);
3110 (CmpTy
== ZeroCompare::GESExt
|| CmpTy
== ZeroCompare::LESExt
))
3111 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, ToExtend
,
3112 S
->getI64Imm(63, dl
)), 0);
3114 assert(Is32Bit
&& "Should have handled the 32-bit sequences above.");
3115 // For 32-bit sequences, the extensions differ between GE/LE cases.
3117 case ZeroCompare::GEZExt
: {
3118 SDValue ShiftOps
[] = { ToExtend
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
3119 S
->getI32Imm(31, dl
) };
3120 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
3123 case ZeroCompare::GESExt
:
3124 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, ToExtend
,
3125 S
->getI32Imm(31, dl
)), 0);
3126 case ZeroCompare::LEZExt
:
3127 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, ToExtend
,
3128 S
->getI32Imm(1, dl
)), 0);
3129 case ZeroCompare::LESExt
:
3130 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, ToExtend
,
3131 S
->getI32Imm(-1, dl
)), 0);
3134 // The above case covers all the enumerators so it can't have a default clause
3135 // to avoid compiler warnings.
3136 llvm_unreachable("Unknown zero-comparison type.");
3139 /// Produces a zero-extended result of comparing two 32-bit values according to
3140 /// the passed condition code.
3142 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS
, SDValue RHS
,
3144 int64_t RHSValue
, SDLoc dl
) {
3145 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
3146 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Sext
)
3148 bool IsRHSZero
= RHSValue
== 0;
3149 bool IsRHSOne
= RHSValue
== 1;
3150 bool IsRHSNegOne
= RHSValue
== -1LL;
3152 default: return SDValue();
3154 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3155 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3156 SDValue Xor
= IsRHSZero
? LHS
:
3157 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3159 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
3160 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
3161 S
->getI32Imm(31, dl
) };
3162 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
3166 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3167 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3168 SDValue Xor
= IsRHSZero
? LHS
:
3169 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3171 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
3172 SDValue ShiftOps
[] = { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
),
3173 S
->getI32Imm(31, dl
) };
3175 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
3176 return SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
3177 S
->getI32Imm(1, dl
)), 0);
3180 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3181 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3183 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3185 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3186 // by swapping inputs and falling through.
3187 std::swap(LHS
, RHS
);
3188 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3189 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3193 if (CmpInGPR
== ICGPR_NonExtIn
)
3195 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3196 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3198 if (CmpInGPR
== ICGPR_NonExtIn
)
3200 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3203 // The upper 32-bits of the register can't be undefined for this sequence.
3204 LHS
= signExtendInputIfNeeded(LHS
);
3205 RHS
= signExtendInputIfNeeded(RHS
);
3207 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3209 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Sub
,
3210 S
->getI64Imm(1, dl
), S
->getI64Imm(63, dl
)),
3213 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
,
3214 MVT::i64
, Shift
, S
->getI32Imm(1, dl
)), 0);
3217 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3218 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3219 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3220 // Handle SETLT -1 (which is equivalent to SETGE 0).
3222 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3225 if (CmpInGPR
== ICGPR_NonExtIn
)
3227 // The upper 32-bits of the register can't be undefined for this sequence.
3228 LHS
= signExtendInputIfNeeded(LHS
);
3229 RHS
= signExtendInputIfNeeded(RHS
);
3231 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
3232 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3233 Neg
, S
->getI32Imm(1, dl
), S
->getI32Imm(63, dl
)), 0);
3235 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3236 // (%b < %a) by swapping inputs and falling through.
3237 std::swap(LHS
, RHS
);
3238 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3239 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3240 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3244 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3245 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3246 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3247 // Handle SETLT 1 (which is equivalent to SETLE 0).
3249 if (CmpInGPR
== ICGPR_NonExtIn
)
3251 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3255 SDValue ShiftOps
[] = { LHS
, S
->getI32Imm(1, dl
), S
->getI32Imm(31, dl
),
3256 S
->getI32Imm(31, dl
) };
3257 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
3261 if (CmpInGPR
== ICGPR_NonExtIn
)
3263 // The upper 32-bits of the register can't be undefined for this sequence.
3264 LHS
= signExtendInputIfNeeded(LHS
);
3265 RHS
= signExtendInputIfNeeded(RHS
);
3267 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3268 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3269 SUBFNode
, S
->getI64Imm(1, dl
),
3270 S
->getI64Imm(63, dl
)), 0);
3273 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3274 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3275 std::swap(LHS
, RHS
);
3278 if (CmpInGPR
== ICGPR_NonExtIn
)
3280 // The upper 32-bits of the register can't be undefined for this sequence.
3281 LHS
= zeroExtendInputIfNeeded(LHS
);
3282 RHS
= zeroExtendInputIfNeeded(RHS
);
3284 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3286 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3287 Subtract
, S
->getI64Imm(1, dl
),
3288 S
->getI64Imm(63, dl
)), 0);
3289 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, SrdiNode
,
3290 S
->getI32Imm(1, dl
)), 0);
3293 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3294 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3295 std::swap(LHS
, RHS
);
3298 if (CmpInGPR
== ICGPR_NonExtIn
)
3300 // The upper 32-bits of the register can't be undefined for this sequence.
3301 LHS
= zeroExtendInputIfNeeded(LHS
);
3302 RHS
= zeroExtendInputIfNeeded(RHS
);
3304 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3305 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3306 Subtract
, S
->getI64Imm(1, dl
),
3307 S
->getI64Imm(63, dl
)), 0);
3312 /// Produces a sign-extended result of comparing two 32-bit values according to
3313 /// the passed condition code.
3315 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS
, SDValue RHS
,
3317 int64_t RHSValue
, SDLoc dl
) {
3318 if (CmpInGPR
== ICGPR_I64
|| CmpInGPR
== ICGPR_SextI64
||
3319 CmpInGPR
== ICGPR_ZextI64
|| CmpInGPR
== ICGPR_Zext
)
3321 bool IsRHSZero
= RHSValue
== 0;
3322 bool IsRHSOne
= RHSValue
== 1;
3323 bool IsRHSNegOne
= RHSValue
== -1LL;
3326 default: return SDValue();
3328 // (sext (setcc %a, %b, seteq)) ->
3329 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3330 // (sext (setcc %a, 0, seteq)) ->
3331 // (ashr (shl (ctlz %a), 58), 63)
3332 SDValue CountInput
= IsRHSZero
? LHS
:
3333 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3335 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, CountInput
), 0);
3336 SDValue SHLOps
[] = { Cntlzw
, S
->getI32Imm(27, dl
),
3337 S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
3339 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, SHLOps
), 0);
3340 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Slwi
), 0);
3343 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3344 // flip the bit, finally take 2's complement.
3345 // (sext (setcc %a, %b, setne)) ->
3346 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3347 // Same as above, but the first xor is not needed.
3348 // (sext (setcc %a, 0, setne)) ->
3349 // (neg (xor (lshr (ctlz %a), 5), 1))
3350 SDValue Xor
= IsRHSZero
? LHS
:
3351 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
3353 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
3354 SDValue ShiftOps
[] =
3355 { Clz
, S
->getI32Imm(27, dl
), S
->getI32Imm(5, dl
), S
->getI32Imm(31, dl
) };
3357 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
3359 SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
3360 S
->getI32Imm(1, dl
)), 0);
3361 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Xori
), 0);
3364 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3365 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3367 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3369 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3370 // by swapping inputs and falling through.
3371 std::swap(LHS
, RHS
);
3372 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3373 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3377 if (CmpInGPR
== ICGPR_NonExtIn
)
3379 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3380 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3382 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3384 // The upper 32-bits of the register can't be undefined for this sequence.
3385 LHS
= signExtendInputIfNeeded(LHS
);
3386 RHS
= signExtendInputIfNeeded(RHS
);
3388 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, MVT::Glue
,
3391 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3392 SUBFNode
, S
->getI64Imm(1, dl
),
3393 S
->getI64Imm(63, dl
)), 0);
3394 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Srdi
,
3395 S
->getI32Imm(-1, dl
)), 0);
3398 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3399 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3400 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3402 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3404 if (CmpInGPR
== ICGPR_NonExtIn
)
3406 // The upper 32-bits of the register can't be undefined for this sequence.
3407 LHS
= signExtendInputIfNeeded(LHS
);
3408 RHS
= signExtendInputIfNeeded(RHS
);
3410 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
3411 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Neg
,
3412 S
->getI64Imm(63, dl
)), 0);
3414 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3415 // (%b < %a) by swapping inputs and falling through.
3416 std::swap(LHS
, RHS
);
3417 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3418 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3419 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3423 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3424 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3425 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3427 if (CmpInGPR
== ICGPR_NonExtIn
)
3429 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3432 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, LHS
,
3433 S
->getI32Imm(31, dl
)), 0);
3435 if (CmpInGPR
== ICGPR_NonExtIn
)
3437 // The upper 32-bits of the register can't be undefined for this sequence.
3438 LHS
= signExtendInputIfNeeded(LHS
);
3439 RHS
= signExtendInputIfNeeded(RHS
);
3441 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3442 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3443 SUBFNode
, S
->getI64Imm(63, dl
)), 0);
3446 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3447 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3448 std::swap(LHS
, RHS
);
3451 if (CmpInGPR
== ICGPR_NonExtIn
)
3453 // The upper 32-bits of the register can't be undefined for this sequence.
3454 LHS
= zeroExtendInputIfNeeded(LHS
);
3455 RHS
= zeroExtendInputIfNeeded(RHS
);
3457 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3459 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Subtract
,
3460 S
->getI32Imm(1, dl
), S
->getI32Imm(63,dl
)),
3462 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Shift
,
3463 S
->getI32Imm(-1, dl
)), 0);
3466 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3467 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3468 std::swap(LHS
, RHS
);
3471 if (CmpInGPR
== ICGPR_NonExtIn
)
3473 // The upper 32-bits of the register can't be undefined for this sequence.
3474 LHS
= zeroExtendInputIfNeeded(LHS
);
3475 RHS
= zeroExtendInputIfNeeded(RHS
);
3477 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, RHS
, LHS
), 0);
3478 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3479 Subtract
, S
->getI64Imm(63, dl
)), 0);
3484 /// Produces a zero-extended result of comparing two 64-bit values according to
3485 /// the passed condition code.
3487 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS
, SDValue RHS
,
3489 int64_t RHSValue
, SDLoc dl
) {
3490 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3491 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Sext
)
3493 bool IsRHSZero
= RHSValue
== 0;
3494 bool IsRHSOne
= RHSValue
== 1;
3495 bool IsRHSNegOne
= RHSValue
== -1LL;
3497 default: return SDValue();
3499 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3500 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3501 SDValue Xor
= IsRHSZero
? LHS
:
3502 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3504 SDValue(CurDAG
->getMachineNode(PPC::CNTLZD
, dl
, MVT::i64
, Xor
), 0);
3505 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Clz
,
3506 S
->getI64Imm(58, dl
),
3507 S
->getI64Imm(63, dl
)), 0);
3510 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3511 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3512 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3513 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3514 SDValue Xor
= IsRHSZero
? LHS
:
3515 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3517 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3518 Xor
, S
->getI32Imm(~0U, dl
)), 0);
3519 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, AC
,
3520 Xor
, AC
.getValue(1)), 0);
3523 // {subc.reg, subc.CA} = (subcarry %a, %b)
3524 // (zext (setcc %a, %b, setge)) ->
3525 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3526 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3528 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3529 std::swap(LHS
, RHS
);
3530 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3531 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3535 // {subc.reg, subc.CA} = (subcarry %b, %a)
3536 // (zext (setcc %a, %b, setge)) ->
3537 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3538 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3540 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3542 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3543 S
->getI64Imm(1, dl
),
3544 S
->getI64Imm(63, dl
)), 0);
3546 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3547 S
->getI64Imm(63, dl
)), 0);
3548 SDValue SubtractCarry
=
3549 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3551 return SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3552 ShiftR
, ShiftL
, SubtractCarry
), 0);
3555 // {subc.reg, subc.CA} = (subcarry %b, %a)
3556 // (zext (setcc %a, %b, setgt)) ->
3557 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3558 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3560 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
3563 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3564 S
->getI64Imm(~0ULL, dl
)), 0);
3566 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Addi
, LHS
), 0);
3567 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Nor
,
3568 S
->getI64Imm(1, dl
),
3569 S
->getI64Imm(63, dl
)), 0);
3571 std::swap(LHS
, RHS
);
3572 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3573 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3574 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3578 // {subc.reg, subc.CA} = (subcarry %a, %b)
3579 // (zext (setcc %a, %b, setlt)) ->
3580 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3581 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3583 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
3585 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3586 S
->getI64Imm(1, dl
),
3587 S
->getI64Imm(63, dl
)), 0);
3589 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3590 LHS
, S
->getI64Imm(63, dl
)), 0);
3592 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3593 RHS
, S
->getI64Imm(1, dl
),
3594 S
->getI64Imm(63, dl
)), 0);
3595 SDValue SUBFC8Carry
=
3596 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3599 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3600 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3601 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3602 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3605 // {subc.reg, subc.CA} = (subcarry %a, %b)
3606 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3607 std::swap(LHS
, RHS
);
3610 // {subc.reg, subc.CA} = (subcarry %b, %a)
3611 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3612 SDValue SUBFC8Carry
=
3613 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3615 SDValue SUBFE8Node
=
3616 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
,
3617 LHS
, LHS
, SUBFC8Carry
), 0);
3618 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
,
3619 SUBFE8Node
, S
->getI64Imm(1, dl
)), 0);
3622 // {subc.reg, subc.CA} = (subcarry %b, %a)
3623 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3624 std::swap(LHS
, RHS
);
3627 // {subc.reg, subc.CA} = (subcarry %a, %b)
3628 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3629 SDValue SubtractCarry
=
3630 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3633 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3634 LHS
, LHS
, SubtractCarry
), 0);
3635 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3641 /// Produces a sign-extended result of comparing two 64-bit values according to
3642 /// the passed condition code.
3644 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS
, SDValue RHS
,
3646 int64_t RHSValue
, SDLoc dl
) {
3647 if (CmpInGPR
== ICGPR_I32
|| CmpInGPR
== ICGPR_SextI32
||
3648 CmpInGPR
== ICGPR_ZextI32
|| CmpInGPR
== ICGPR_Zext
)
3650 bool IsRHSZero
= RHSValue
== 0;
3651 bool IsRHSOne
= RHSValue
== 1;
3652 bool IsRHSNegOne
= RHSValue
== -1LL;
3654 default: return SDValue();
3656 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3657 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3658 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3659 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3660 SDValue AddInput
= IsRHSZero
? LHS
:
3661 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3663 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3664 AddInput
, S
->getI32Imm(~0U, dl
)), 0);
3665 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, Addic
,
3666 Addic
, Addic
.getValue(1)), 0);
3669 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3670 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3671 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3672 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3673 SDValue Xor
= IsRHSZero
? LHS
:
3674 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3676 SDValue(CurDAG
->getMachineNode(PPC::SUBFIC8
, dl
, MVT::i64
, MVT::Glue
,
3677 Xor
, S
->getI32Imm(0, dl
)), 0);
3678 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, SC
,
3679 SC
, SC
.getValue(1)), 0);
3682 // {subc.reg, subc.CA} = (subcarry %a, %b)
3683 // (zext (setcc %a, %b, setge)) ->
3684 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3685 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3687 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3688 std::swap(LHS
, RHS
);
3689 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3690 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3694 // {subc.reg, subc.CA} = (subcarry %b, %a)
3695 // (zext (setcc %a, %b, setge)) ->
3696 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3697 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3699 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3701 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, RHS
,
3702 S
->getI64Imm(63, dl
)), 0);
3704 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, LHS
,
3705 S
->getI64Imm(1, dl
),
3706 S
->getI64Imm(63, dl
)), 0);
3707 SDValue SubtractCarry
=
3708 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3711 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
, MVT::Glue
,
3712 ShiftR
, ShiftL
, SubtractCarry
), 0);
3713 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, Adde
), 0);
3716 // {subc.reg, subc.CA} = (subcarry %b, %a)
3717 // (zext (setcc %a, %b, setgt)) ->
3718 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3719 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3721 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
3724 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
3725 S
->getI64Imm(-1, dl
)), 0);
3727 SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
, Add
, LHS
), 0);
3728 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, Nor
,
3729 S
->getI64Imm(63, dl
)), 0);
3731 std::swap(LHS
, RHS
);
3732 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3733 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
3734 IsRHSOne
= RHSConst
&& RHSConst
->getSExtValue() == 1;
3738 // {subc.reg, subc.CA} = (subcarry %a, %b)
3739 // (zext (setcc %a, %b, setlt)) ->
3740 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3741 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3743 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
3745 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, LHS
,
3746 S
->getI64Imm(63, dl
)), 0);
3749 SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
,
3750 LHS
, S
->getI64Imm(63, dl
)), 0);
3752 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
3753 RHS
, S
->getI64Imm(1, dl
),
3754 S
->getI64Imm(63, dl
)), 0);
3755 SDValue SUBFC8Carry
=
3756 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3759 SDValue(CurDAG
->getMachineNode(PPC::ADDE8
, dl
, MVT::i64
,
3760 SRDINode
, SRADINode
, SUBFC8Carry
), 0);
3762 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3763 ADDE8Node
, S
->getI64Imm(1, dl
)), 0);
3764 return SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
,
3768 // {subc.reg, subc.CA} = (subcarry %a, %b)
3769 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3770 std::swap(LHS
, RHS
);
3773 // {subc.reg, subc.CA} = (subcarry %b, %a)
3774 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3775 SDValue SubtractCarry
=
3776 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3779 SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, MVT::Glue
, LHS
,
3780 LHS
, SubtractCarry
), 0);
3781 return SDValue(CurDAG
->getMachineNode(PPC::NOR8
, dl
, MVT::i64
,
3782 ExtSub
, ExtSub
), 0);
3785 // {subc.reg, subc.CA} = (subcarry %b, %a)
3786 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3787 std::swap(LHS
, RHS
);
3790 // {subc.reg, subc.CA} = (subcarry %a, %b)
3791 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3793 SDValue(CurDAG
->getMachineNode(PPC::SUBFC8
, dl
, MVT::i64
, MVT::Glue
,
3795 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
,
3796 LHS
, LHS
, SubCarry
), 0);
3801 /// Do all uses of this SDValue need the result in a GPR?
3802 /// This is meant to be used on values that have type i1 since
3803 /// it is somewhat meaningless to ask if values of other types
3804 /// should be kept in GPR's.
3805 static bool allUsesExtend(SDValue Compare
, SelectionDAG
*CurDAG
) {
3806 assert(Compare
.getOpcode() == ISD::SETCC
&&
3807 "An ISD::SETCC node required here.");
3809 // For values that have a single use, the caller should obviously already have
3810 // checked if that use is an extending use. We check the other uses here.
3811 if (Compare
.hasOneUse())
3813 // We want the value in a GPR if it is being extended, used for a select, or
3814 // used in logical operations.
3815 for (auto CompareUse
: Compare
.getNode()->uses())
3816 if (CompareUse
->getOpcode() != ISD::SIGN_EXTEND
&&
3817 CompareUse
->getOpcode() != ISD::ZERO_EXTEND
&&
3818 CompareUse
->getOpcode() != ISD::SELECT
&&
3819 !isLogicOp(CompareUse
->getOpcode())) {
3820 OmittedForNonExtendUses
++;
3826 /// Returns an equivalent of a SETCC node but with the result the same width as
3827 /// the inputs. This can also be used for SELECT_CC if either the true or false
3828 /// values is a power of two while the other is zero.
3829 SDValue
IntegerCompareEliminator::getSETCCInGPR(SDValue Compare
,
3830 SetccInGPROpts ConvOpts
) {
3831 assert((Compare
.getOpcode() == ISD::SETCC
||
3832 Compare
.getOpcode() == ISD::SELECT_CC
) &&
3833 "An ISD::SETCC node required here.");
3835 // Don't convert this comparison to a GPR sequence because there are uses
3836 // of the i1 result (i.e. uses that require the result in the CR).
3837 if ((Compare
.getOpcode() == ISD::SETCC
) && !allUsesExtend(Compare
, CurDAG
))
3840 SDValue LHS
= Compare
.getOperand(0);
3841 SDValue RHS
= Compare
.getOperand(1);
3843 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3844 int CCOpNum
= Compare
.getOpcode() == ISD::SELECT_CC
? 4 : 2;
3846 cast
<CondCodeSDNode
>(Compare
.getOperand(CCOpNum
))->get();
3847 EVT InputVT
= LHS
.getValueType();
3848 if (InputVT
!= MVT::i32
&& InputVT
!= MVT::i64
)
3851 if (ConvOpts
== SetccInGPROpts::ZExtInvert
||
3852 ConvOpts
== SetccInGPROpts::SExtInvert
)
3853 CC
= ISD::getSetCCInverse(CC
, InputVT
);
3855 bool Inputs32Bit
= InputVT
== MVT::i32
;
3858 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3859 int64_t RHSValue
= RHSConst
? RHSConst
->getSExtValue() : INT64_MAX
;
3860 bool IsSext
= ConvOpts
== SetccInGPROpts::SExtOrig
||
3861 ConvOpts
== SetccInGPROpts::SExtInvert
;
3863 if (IsSext
&& Inputs32Bit
)
3864 return get32BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3865 else if (Inputs32Bit
)
3866 return get32BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3868 return get64BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3869 return get64BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3872 } // end anonymous namespace
3874 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode
*N
) {
3875 if (N
->getValueType(0) != MVT::i32
&&
3876 N
->getValueType(0) != MVT::i64
)
3879 // This optimization will emit code that assumes 64-bit registers
3880 // so we don't want to run it in 32-bit mode. Also don't run it
3881 // on functions that are not to be optimized.
3882 if (TM
.getOptLevel() == CodeGenOpt::None
|| !TM
.isPPC64())
3885 // For POWER10, it is more profitable to use the set boolean extension
3886 // instructions rather than the integer compare elimination codegen.
3887 // Users can override this via the command line option, `--ppc-gpr-icmps`.
3888 if (!(CmpInGPR
.getNumOccurrences() > 0) && Subtarget
->isISA3_1())
3891 switch (N
->getOpcode()) {
3893 case ISD::ZERO_EXTEND
:
3894 case ISD::SIGN_EXTEND
:
3898 IntegerCompareEliminator
ICmpElim(CurDAG
, this);
3899 if (SDNode
*New
= ICmpElim
.Select(N
)) {
3900 ReplaceNode(N
, New
);
3908 bool PPCDAGToDAGISel::tryBitPermutation(SDNode
*N
) {
3909 if (N
->getValueType(0) != MVT::i32
&&
3910 N
->getValueType(0) != MVT::i64
)
3913 if (!UseBitPermRewriter
)
3916 switch (N
->getOpcode()) {
3923 BitPermutationSelector
BPS(CurDAG
);
3924 if (SDNode
*New
= BPS
.Select(N
)) {
3925 ReplaceNode(N
, New
);
3935 /// SelectCC - Select a comparison of the specified values with the specified
3936 /// condition code, returning the CR# of the expression.
3937 SDValue
PPCDAGToDAGISel::SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
3938 const SDLoc
&dl
, SDValue Chain
) {
3939 // Always select the LHS.
3942 if (LHS
.getValueType() == MVT::i32
) {
3944 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3945 if (isInt32Immediate(RHS
, Imm
)) {
3946 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3947 if (isUInt
<16>(Imm
))
3948 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3949 getI32Imm(Imm
& 0xFFFF, dl
)),
3951 // If this is a 16-bit signed immediate, fold it.
3952 if (isInt
<16>((int)Imm
))
3953 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3954 getI32Imm(Imm
& 0xFFFF, dl
)),
3957 // For non-equality comparisons, the default code would materialize the
3958 // constant, then compare against it, like this:
3960 // ori r2, r2, 22136
3962 // Since we are just comparing for equality, we can emit this instead:
3963 // xoris r0,r3,0x1234
3964 // cmplwi cr0,r0,0x5678
3966 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS
, dl
, MVT::i32
, LHS
,
3967 getI32Imm(Imm
>> 16, dl
)), 0);
3968 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, Xor
,
3969 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3972 } else if (ISD::isUnsignedIntSetCC(CC
)) {
3973 if (isInt32Immediate(RHS
, Imm
) && isUInt
<16>(Imm
))
3974 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
3975 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
3979 if (isIntS16Immediate(RHS
, SImm
))
3980 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
3981 getI32Imm((int)SImm
& 0xFFFF,
3986 } else if (LHS
.getValueType() == MVT::i64
) {
3988 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
3989 if (isInt64Immediate(RHS
.getNode(), Imm
)) {
3990 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3991 if (isUInt
<16>(Imm
))
3992 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
3993 getI32Imm(Imm
& 0xFFFF, dl
)),
3995 // If this is a 16-bit signed immediate, fold it.
3997 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
3998 getI32Imm(Imm
& 0xFFFF, dl
)),
4001 // For non-equality comparisons, the default code would materialize the
4002 // constant, then compare against it, like this:
4004 // ori r2, r2, 22136
4006 // Since we are just comparing for equality, we can emit this instead:
4007 // xoris r0,r3,0x1234
4008 // cmpldi cr0,r0,0x5678
4010 if (isUInt
<32>(Imm
)) {
4011 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS8
, dl
, MVT::i64
, LHS
,
4012 getI64Imm(Imm
>> 16, dl
)), 0);
4013 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, Xor
,
4014 getI64Imm(Imm
& 0xFFFF, dl
)),
4019 } else if (ISD::isUnsignedIntSetCC(CC
)) {
4020 if (isInt64Immediate(RHS
.getNode(), Imm
) && isUInt
<16>(Imm
))
4021 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
4022 getI64Imm(Imm
& 0xFFFF, dl
)), 0);
4026 if (isIntS16Immediate(RHS
, SImm
))
4027 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
4028 getI64Imm(SImm
& 0xFFFF, dl
)),
4032 } else if (LHS
.getValueType() == MVT::f32
) {
4033 if (Subtarget
->hasSPE()) {
4038 Opc
= PPC::EFSCMPEQ
;
4046 Opc
= PPC::EFSCMPLT
;
4054 Opc
= PPC::EFSCMPGT
;
4059 } else if (LHS
.getValueType() == MVT::f64
) {
4060 if (Subtarget
->hasSPE()) {
4065 Opc
= PPC::EFDCMPEQ
;
4073 Opc
= PPC::EFDCMPLT
;
4081 Opc
= PPC::EFDCMPGT
;
4085 Opc
= Subtarget
->hasVSX() ? PPC::XSCMPUDP
: PPC::FCMPUD
;
4087 assert(LHS
.getValueType() == MVT::f128
&& "Unknown vt!");
4088 assert(Subtarget
->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4089 Opc
= PPC::XSCMPUQP
;
4093 CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, MVT::Other
, LHS
, RHS
, Chain
),
4096 return SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, LHS
, RHS
), 0);
4099 static PPC::Predicate
getPredicateForSetCC(ISD::CondCode CC
, const EVT
&VT
,
4100 const PPCSubtarget
*Subtarget
) {
4101 // For SPE instructions, the result is in GT bit of the CR
4102 bool UseSPE
= Subtarget
->hasSPE() && VT
.isFloatingPoint();
4109 llvm_unreachable("Should be lowered by legalize!");
4110 default: llvm_unreachable("Unknown condition!");
4113 return UseSPE
? PPC::PRED_GT
: PPC::PRED_EQ
;
4116 return UseSPE
? PPC::PRED_LE
: PPC::PRED_NE
;
4119 return UseSPE
? PPC::PRED_GT
: PPC::PRED_LT
;
4122 return PPC::PRED_LE
;
4125 return PPC::PRED_GT
;
4128 return UseSPE
? PPC::PRED_LE
: PPC::PRED_GE
;
4129 case ISD::SETO
: return PPC::PRED_NU
;
4130 case ISD::SETUO
: return PPC::PRED_UN
;
4131 // These two are invalid for floating point. Assume we have int.
4132 case ISD::SETULT
: return PPC::PRED_LT
;
4133 case ISD::SETUGT
: return PPC::PRED_GT
;
4137 /// getCRIdxForSetCC - Return the index of the condition register field
4138 /// associated with the SetCC condition, and whether or not the field is
4139 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
4140 static unsigned getCRIdxForSetCC(ISD::CondCode CC
, bool &Invert
) {
4143 default: llvm_unreachable("Unknown condition!");
4145 case ISD::SETLT
: return 0; // Bit #0 = SETOLT
4147 case ISD::SETGT
: return 1; // Bit #1 = SETOGT
4149 case ISD::SETEQ
: return 2; // Bit #2 = SETOEQ
4150 case ISD::SETUO
: return 3; // Bit #3 = SETUO
4152 case ISD::SETGE
: Invert
= true; return 0; // !Bit #0 = SETUGE
4154 case ISD::SETLE
: Invert
= true; return 1; // !Bit #1 = SETULE
4156 case ISD::SETNE
: Invert
= true; return 2; // !Bit #2 = SETUNE
4157 case ISD::SETO
: Invert
= true; return 3; // !Bit #3 = SETO
4162 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4163 // These are invalid for floating point. Assume integer.
4164 case ISD::SETULT
: return 0;
4165 case ISD::SETUGT
: return 1;
4169 // getVCmpInst: return the vector compare instruction for the specified
4170 // vector type and condition code. Since this is for altivec specific code,
4171 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4173 static unsigned int getVCmpInst(MVT VecVT
, ISD::CondCode CC
,
4174 bool HasVSX
, bool &Swap
, bool &Negate
) {
4178 if (VecVT
.isFloatingPoint()) {
4179 /* Handle some cases by swapping input operands. */
4181 case ISD::SETLE
: CC
= ISD::SETGE
; Swap
= true; break;
4182 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
4183 case ISD::SETOLE
: CC
= ISD::SETOGE
; Swap
= true; break;
4184 case ISD::SETOLT
: CC
= ISD::SETOGT
; Swap
= true; break;
4185 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
4186 case ISD::SETUGT
: CC
= ISD::SETULT
; Swap
= true; break;
4189 /* Handle some cases by negating the result. */
4191 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
4192 case ISD::SETUNE
: CC
= ISD::SETOEQ
; Negate
= true; break;
4193 case ISD::SETULE
: CC
= ISD::SETOGT
; Negate
= true; break;
4194 case ISD::SETULT
: CC
= ISD::SETOGE
; Negate
= true; break;
4197 /* We have instructions implementing the remaining cases. */
4201 if (VecVT
== MVT::v4f32
)
4202 return HasVSX
? PPC::XVCMPEQSP
: PPC::VCMPEQFP
;
4203 else if (VecVT
== MVT::v2f64
)
4204 return PPC::XVCMPEQDP
;
4208 if (VecVT
== MVT::v4f32
)
4209 return HasVSX
? PPC::XVCMPGTSP
: PPC::VCMPGTFP
;
4210 else if (VecVT
== MVT::v2f64
)
4211 return PPC::XVCMPGTDP
;
4215 if (VecVT
== MVT::v4f32
)
4216 return HasVSX
? PPC::XVCMPGESP
: PPC::VCMPGEFP
;
4217 else if (VecVT
== MVT::v2f64
)
4218 return PPC::XVCMPGEDP
;
4223 llvm_unreachable("Invalid floating-point vector compare condition");
4225 /* Handle some cases by swapping input operands. */
4227 case ISD::SETGE
: CC
= ISD::SETLE
; Swap
= true; break;
4228 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
4229 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
4230 case ISD::SETULT
: CC
= ISD::SETUGT
; Swap
= true; break;
4233 /* Handle some cases by negating the result. */
4235 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
4236 case ISD::SETUNE
: CC
= ISD::SETUEQ
; Negate
= true; break;
4237 case ISD::SETLE
: CC
= ISD::SETGT
; Negate
= true; break;
4238 case ISD::SETULE
: CC
= ISD::SETUGT
; Negate
= true; break;
4241 /* We have instructions implementing the remaining cases. */
4245 if (VecVT
== MVT::v16i8
)
4246 return PPC::VCMPEQUB
;
4247 else if (VecVT
== MVT::v8i16
)
4248 return PPC::VCMPEQUH
;
4249 else if (VecVT
== MVT::v4i32
)
4250 return PPC::VCMPEQUW
;
4251 else if (VecVT
== MVT::v2i64
)
4252 return PPC::VCMPEQUD
;
4253 else if (VecVT
== MVT::v1i128
)
4254 return PPC::VCMPEQUQ
;
4257 if (VecVT
== MVT::v16i8
)
4258 return PPC::VCMPGTSB
;
4259 else if (VecVT
== MVT::v8i16
)
4260 return PPC::VCMPGTSH
;
4261 else if (VecVT
== MVT::v4i32
)
4262 return PPC::VCMPGTSW
;
4263 else if (VecVT
== MVT::v2i64
)
4264 return PPC::VCMPGTSD
;
4265 else if (VecVT
== MVT::v1i128
)
4266 return PPC::VCMPGTSQ
;
4269 if (VecVT
== MVT::v16i8
)
4270 return PPC::VCMPGTUB
;
4271 else if (VecVT
== MVT::v8i16
)
4272 return PPC::VCMPGTUH
;
4273 else if (VecVT
== MVT::v4i32
)
4274 return PPC::VCMPGTUW
;
4275 else if (VecVT
== MVT::v2i64
)
4276 return PPC::VCMPGTUD
;
4277 else if (VecVT
== MVT::v1i128
)
4278 return PPC::VCMPGTUQ
;
4283 llvm_unreachable("Invalid integer vector compare condition");
4287 bool PPCDAGToDAGISel::trySETCC(SDNode
*N
) {
4290 bool IsStrict
= N
->isStrictFPOpcode();
4292 cast
<CondCodeSDNode
>(N
->getOperand(IsStrict
? 3 : 2))->get();
4294 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
4295 bool isPPC64
= (PtrVT
== MVT::i64
);
4296 SDValue Chain
= IsStrict
? N
->getOperand(0) : SDValue();
4298 SDValue LHS
= N
->getOperand(IsStrict
? 1 : 0);
4299 SDValue RHS
= N
->getOperand(IsStrict
? 2 : 1);
4301 if (!IsStrict
&& !Subtarget
->useCRBits() && isInt32Immediate(RHS
, Imm
)) {
4302 // We can codegen setcc op, imm very efficiently compared to a brcond.
4303 // Check for those cases here.
4310 Op
= SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Op
), 0);
4311 SDValue Ops
[] = { Op
, getI32Imm(27, dl
), getI32Imm(5, dl
),
4312 getI32Imm(31, dl
) };
4313 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4319 SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4320 Op
, getI32Imm(~0U, dl
)), 0);
4321 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, AD
, Op
, AD
.getValue(1));
4325 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4326 getI32Imm(31, dl
) };
4327 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4332 SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Op
), 0);
4333 T
= SDValue(CurDAG
->getMachineNode(PPC::ANDC
, dl
, MVT::i32
, T
, Op
), 0);
4334 SDValue Ops
[] = { T
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4335 getI32Imm(31, dl
) };
4336 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4340 } else if (Imm
== ~0U) { // setcc op, -1
4346 Op
= SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4347 Op
, getI32Imm(1, dl
)), 0);
4348 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
,
4349 SDValue(CurDAG
->getMachineNode(PPC::LI
, dl
,
4352 0), Op
.getValue(1));
4356 Op
= SDValue(CurDAG
->getMachineNode(PPC::NOR
, dl
, MVT::i32
, Op
, Op
), 0);
4357 SDNode
*AD
= CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
4358 Op
, getI32Imm(~0U, dl
));
4359 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(AD
, 0), Op
,
4364 SDValue AD
= SDValue(CurDAG
->getMachineNode(PPC::ADDI
, dl
, MVT::i32
, Op
,
4365 getI32Imm(1, dl
)), 0);
4366 SDValue AN
= SDValue(CurDAG
->getMachineNode(PPC::AND
, dl
, MVT::i32
, AD
,
4368 SDValue Ops
[] = { AN
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4369 getI32Imm(31, dl
) };
4370 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4374 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
4375 getI32Imm(31, dl
) };
4376 Op
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4377 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Op
, getI32Imm(1, dl
));
4384 // Altivec Vector compare instructions do not set any CR register by default and
4385 // vector compare operations return the same type as the operands.
4386 if (!IsStrict
&& LHS
.getValueType().isVector()) {
4387 if (Subtarget
->hasSPE())
4390 EVT VecVT
= LHS
.getValueType();
4392 unsigned int VCmpInst
=
4393 getVCmpInst(VecVT
.getSimpleVT(), CC
, Subtarget
->hasVSX(), Swap
, Negate
);
4395 std::swap(LHS
, RHS
);
4397 EVT ResVT
= VecVT
.changeVectorElementTypeToInteger();
4399 SDValue
VCmp(CurDAG
->getMachineNode(VCmpInst
, dl
, ResVT
, LHS
, RHS
), 0);
4400 CurDAG
->SelectNodeTo(N
, Subtarget
->hasVSX() ? PPC::XXLNOR
: PPC::VNOR
,
4405 CurDAG
->SelectNodeTo(N
, VCmpInst
, ResVT
, LHS
, RHS
);
4409 if (Subtarget
->useCRBits())
4413 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
4414 SDValue CCReg
= SelectCC(LHS
, RHS
, CC
, dl
, Chain
);
4416 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 1), CCReg
.getValue(1));
4419 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4420 // The correct compare instruction is already set by SelectCC()
4421 if (Subtarget
->hasSPE() && LHS
.getValueType().isFloatingPoint()) {
4425 // Force the ccreg into CR7.
4426 SDValue CR7Reg
= CurDAG
->getRegister(PPC::CR7
, MVT::i32
);
4428 SDValue
InFlag(nullptr, 0); // Null incoming flag value.
4429 CCReg
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, CR7Reg
, CCReg
,
4430 InFlag
).getValue(1);
4432 IntCR
= SDValue(CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
, CR7Reg
,
4435 SDValue Ops
[] = { IntCR
, getI32Imm((32 - (3 - Idx
)) & 31, dl
),
4436 getI32Imm(31, dl
), getI32Imm(31, dl
) };
4438 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4442 // Get the specified bit.
4444 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
4445 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Tmp
, getI32Imm(1, dl
));
4449 /// Does this node represent a load/store node whose address can be represented
4450 /// with a register plus an immediate that's a multiple of \p Val:
4451 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const {
4452 LoadSDNode
*LDN
= dyn_cast
<LoadSDNode
>(N
);
4453 StoreSDNode
*STN
= dyn_cast
<StoreSDNode
>(N
);
4456 AddrOp
= LDN
->getOperand(1);
4458 AddrOp
= STN
->getOperand(2);
4460 // If the address points a frame object or a frame object with an offset,
4461 // we need to check the object alignment.
4463 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(
4464 AddrOp
.getOpcode() == ISD::ADD
? AddrOp
.getOperand(0) :
4466 // If op0 is a frame index that is under aligned, we can't do it either,
4467 // because it is translated to r31 or r1 + slot + offset. We won't know the
4468 // slot number until the stack frame is finalized.
4469 const MachineFrameInfo
&MFI
= CurDAG
->getMachineFunction().getFrameInfo();
4470 unsigned SlotAlign
= MFI
.getObjectAlign(FI
->getIndex()).value();
4471 if ((SlotAlign
% Val
) != 0)
4474 // If we have an offset, we need further check on the offset.
4475 if (AddrOp
.getOpcode() != ISD::ADD
)
4479 if (AddrOp
.getOpcode() == ISD::ADD
)
4480 return isIntS16Immediate(AddrOp
.getOperand(1), Imm
) && !(Imm
% Val
);
4482 // If the address comes from the outside, the offset will be zero.
4483 return AddrOp
.getOpcode() == ISD::CopyFromReg
;
4486 void PPCDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
4487 // Transfer memoperands.
4488 MachineMemOperand
*MemOp
= cast
<MemSDNode
>(N
)->getMemOperand();
4489 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
4492 static bool mayUseP9Setb(SDNode
*N
, const ISD::CondCode
&CC
, SelectionDAG
*DAG
,
4493 bool &NeedSwapOps
, bool &IsUnCmp
) {
4495 assert(N
->getOpcode() == ISD::SELECT_CC
&& "Expecting a SELECT_CC here.");
4497 SDValue LHS
= N
->getOperand(0);
4498 SDValue RHS
= N
->getOperand(1);
4499 SDValue TrueRes
= N
->getOperand(2);
4500 SDValue FalseRes
= N
->getOperand(3);
4501 ConstantSDNode
*TrueConst
= dyn_cast
<ConstantSDNode
>(TrueRes
);
4502 if (!TrueConst
|| (N
->getSimpleValueType(0) != MVT::i64
&&
4503 N
->getSimpleValueType(0) != MVT::i32
))
4506 // We are looking for any of:
4507 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4508 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4509 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4510 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4511 int64_t TrueResVal
= TrueConst
->getSExtValue();
4512 if ((TrueResVal
< -1 || TrueResVal
> 1) ||
4513 (TrueResVal
== -1 && FalseRes
.getOpcode() != ISD::ZERO_EXTEND
) ||
4514 (TrueResVal
== 1 && FalseRes
.getOpcode() != ISD::SIGN_EXTEND
) ||
4516 (FalseRes
.getOpcode() != ISD::SELECT_CC
|| CC
!= ISD::SETEQ
)))
4519 SDValue SetOrSelCC
= FalseRes
.getOpcode() == ISD::SELECT_CC
4521 : FalseRes
.getOperand(0);
4522 bool InnerIsSel
= SetOrSelCC
.getOpcode() == ISD::SELECT_CC
;
4523 if (SetOrSelCC
.getOpcode() != ISD::SETCC
&&
4524 SetOrSelCC
.getOpcode() != ISD::SELECT_CC
)
4527 // Without this setb optimization, the outer SELECT_CC will be manually
4528 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4529 // transforms pseudo instruction to isel instruction. When there are more than
4530 // one use for result like zext/sext, with current optimization we only see
4531 // isel is replaced by setb but can't see any significant gain. Since
4532 // setb has longer latency than original isel, we should avoid this. Another
4533 // point is that setb requires comparison always kept, it can break the
4534 // opportunity to get the comparison away if we have in future.
4535 if (!SetOrSelCC
.hasOneUse() || (!InnerIsSel
&& !FalseRes
.hasOneUse()))
4538 SDValue InnerLHS
= SetOrSelCC
.getOperand(0);
4539 SDValue InnerRHS
= SetOrSelCC
.getOperand(1);
4540 ISD::CondCode InnerCC
=
4541 cast
<CondCodeSDNode
>(SetOrSelCC
.getOperand(InnerIsSel
? 4 : 2))->get();
4542 // If the inner comparison is a select_cc, make sure the true/false values are
4543 // 1/-1 and canonicalize it if needed.
4545 ConstantSDNode
*SelCCTrueConst
=
4546 dyn_cast
<ConstantSDNode
>(SetOrSelCC
.getOperand(2));
4547 ConstantSDNode
*SelCCFalseConst
=
4548 dyn_cast
<ConstantSDNode
>(SetOrSelCC
.getOperand(3));
4549 if (!SelCCTrueConst
|| !SelCCFalseConst
)
4551 int64_t SelCCTVal
= SelCCTrueConst
->getSExtValue();
4552 int64_t SelCCFVal
= SelCCFalseConst
->getSExtValue();
4553 // The values must be -1/1 (requiring a swap) or 1/-1.
4554 if (SelCCTVal
== -1 && SelCCFVal
== 1) {
4555 std::swap(InnerLHS
, InnerRHS
);
4556 } else if (SelCCTVal
!= 1 || SelCCFVal
!= -1)
4560 // Canonicalize unsigned case
4561 if (InnerCC
== ISD::SETULT
|| InnerCC
== ISD::SETUGT
) {
4563 InnerCC
= (InnerCC
== ISD::SETULT
) ? ISD::SETLT
: ISD::SETGT
;
4566 bool InnerSwapped
= false;
4567 if (LHS
== InnerRHS
&& RHS
== InnerLHS
)
4568 InnerSwapped
= true;
4569 else if (LHS
!= InnerLHS
|| RHS
!= InnerRHS
)
4573 // (select_cc lhs, rhs, 0, \
4574 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4578 if (InnerCC
!= ISD::SETLT
&& InnerCC
!= ISD::SETGT
)
4580 NeedSwapOps
= (InnerCC
== ISD::SETGT
) ? InnerSwapped
: !InnerSwapped
;
4583 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4584 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4585 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4586 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4587 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4588 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4590 if (!IsUnCmp
&& InnerCC
!= ISD::SETNE
)
4595 if (InnerCC
== ISD::SETNE
|| (InnerCC
== ISD::SETGT
&& !InnerSwapped
) ||
4596 (InnerCC
== ISD::SETLT
&& InnerSwapped
))
4597 NeedSwapOps
= (TrueResVal
== 1);
4602 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4603 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4604 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4605 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4606 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4607 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4609 if (!IsUnCmp
&& InnerCC
!= ISD::SETNE
)
4614 if (InnerCC
== ISD::SETNE
|| (InnerCC
== ISD::SETLT
&& !InnerSwapped
) ||
4615 (InnerCC
== ISD::SETGT
&& InnerSwapped
))
4616 NeedSwapOps
= (TrueResVal
== -1);
4625 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4626 LLVM_DEBUG(N
->dump());
4631 // Return true if it's a software square-root/divide operand.
4632 static bool isSWTestOp(SDValue N
) {
4633 if (N
.getOpcode() == PPCISD::FTSQRT
)
4635 if (N
.getNumOperands() < 1 || !isa
<ConstantSDNode
>(N
.getOperand(0)))
4637 switch (N
.getConstantOperandVal(0)) {
4638 case Intrinsic::ppc_vsx_xvtdivdp
:
4639 case Intrinsic::ppc_vsx_xvtdivsp
:
4640 case Intrinsic::ppc_vsx_xvtsqrtdp
:
4641 case Intrinsic::ppc_vsx_xvtsqrtsp
:
4647 bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode
*N
) {
4648 assert(N
->getOpcode() == ISD::BR_CC
&& "ISD::BR_CC is expected.");
4649 // We are looking for following patterns, where `truncate to i1` actually has
4650 // the same semantic with `and 1`.
4651 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4652 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4653 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4654 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4655 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4656 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4657 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4658 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4659 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
4660 if (CC
!= ISD::SETEQ
&& CC
!= ISD::SETNE
)
4663 SDValue CmpRHS
= N
->getOperand(3);
4664 if (!isa
<ConstantSDNode
>(CmpRHS
) ||
4665 cast
<ConstantSDNode
>(CmpRHS
)->getSExtValue() != 0)
4668 SDValue CmpLHS
= N
->getOperand(2);
4669 if (CmpLHS
.getNumOperands() < 1 || !isSWTestOp(CmpLHS
.getOperand(0)))
4673 bool IsCCNE
= CC
== ISD::SETNE
;
4674 if (CmpLHS
.getOpcode() == ISD::AND
&&
4675 isa
<ConstantSDNode
>(CmpLHS
.getOperand(1)))
4676 switch (CmpLHS
.getConstantOperandVal(1)) {
4678 PCC
= IsCCNE
? PPC::PRED_UN
: PPC::PRED_NU
;
4681 PCC
= IsCCNE
? PPC::PRED_EQ
: PPC::PRED_NE
;
4684 PCC
= IsCCNE
? PPC::PRED_GT
: PPC::PRED_LE
;
4687 PCC
= IsCCNE
? PPC::PRED_LT
: PPC::PRED_GE
;
4692 else if (CmpLHS
.getOpcode() == ISD::TRUNCATE
&&
4693 CmpLHS
.getValueType() == MVT::i1
)
4694 PCC
= IsCCNE
? PPC::PRED_UN
: PPC::PRED_NU
;
4698 SDValue Ops
[] = {getI32Imm(PCC
, dl
), CmpLHS
.getOperand(0), N
->getOperand(4),
4700 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
4706 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode
*N
) {
4707 assert(N
->getOpcode() == ISD::AND
&& "ISD::AND SDNode expected");
4709 if (!isInt32Immediate(N
->getOperand(1), Imm
))
4713 SDValue Val
= N
->getOperand(0);
4714 unsigned SH
, MB
, ME
;
4715 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4716 // with a mask, emit rlwinm
4717 if (isRotateAndMask(Val
.getNode(), Imm
, false, SH
, MB
, ME
)) {
4718 Val
= Val
.getOperand(0);
4719 SDValue Ops
[] = {Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
4721 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4725 // If this is just a masked value where the input is not handled, and
4726 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4727 if (isRunOfOnes(Imm
, MB
, ME
) && Val
.getOpcode() != ISD::ROTL
) {
4728 SDValue Ops
[] = {Val
, getI32Imm(0, dl
), getI32Imm(MB
, dl
),
4730 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
4734 // AND X, 0 -> 0, not "rlwinm 32".
4736 ReplaceUses(SDValue(N
, 0), N
->getOperand(1));
4743 bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode
*N
) {
4744 assert(N
->getOpcode() == ISD::AND
&& "ISD::AND SDNode expected");
4746 if (!isInt64Immediate(N
->getOperand(1).getNode(), Imm64
))
4750 if (isRunOfOnes64(Imm64
, MB
, ME
) && MB
>= 32 && MB
<= ME
) {
4752 // +----------------------+
4753 // |xxxxxxxxxxx00011111000|
4754 // +----------------------+
4756 // We can only do it if the MB is larger than 32 and MB <= ME
4757 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4758 // we didn't rotate it.
4760 SDValue Ops
[] = {N
->getOperand(0), getI64Imm(0, dl
), getI64Imm(MB
- 32, dl
),
4761 getI64Imm(ME
- 32, dl
)};
4762 CurDAG
->SelectNodeTo(N
, PPC::RLWINM8
, MVT::i64
, Ops
);
4769 bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode
*N
) {
4770 assert(N
->getOpcode() == ISD::AND
&& "ISD::AND SDNode expected");
4772 if (!isInt64Immediate(N
->getOperand(1).getNode(), Imm64
))
4775 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
4776 // it well with "andi.".
4777 if (isUInt
<16>(Imm64
))
4781 SDValue Val
= N
->getOperand(0);
4783 // Optimized with two rldicl's as follows:
4784 // Add missing bits on left to the mask and check that the mask is a
4785 // wrapped run of ones, i.e.
4786 // Change pattern |0001111100000011111111|
4787 // to |1111111100000011111111|.
4788 unsigned NumOfLeadingZeros
= countLeadingZeros(Imm64
);
4789 if (NumOfLeadingZeros
!= 0)
4790 Imm64
|= maskLeadingOnes
<uint64_t>(NumOfLeadingZeros
);
4793 if (!isRunOfOnes64(Imm64
, MB
, ME
))
4797 // +----------------------+ +----------------------+
4798 // |1111111100000011111111| -> |0000001111111111111111|
4799 // +----------------------+ +----------------------+
4801 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
4802 unsigned OnesOnLeft
= ME
+ 1;
4803 unsigned ZerosInBetween
= (MB
- ME
+ 63) & 63;
4804 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
4805 // on the left the bits that are already zeros in the mask.
4806 Val
= SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, Loc
, MVT::i64
, Val
,
4807 getI64Imm(OnesOnLeft
, Loc
),
4808 getI64Imm(ZerosInBetween
, Loc
)),
4811 // +----------------------+ +----------------------+
4812 // |0000001111111111111111| -> |0001111100000011111111|
4813 // +----------------------+ +----------------------+
4815 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
4816 // left the number of ones we previously added.
4817 SDValue Ops
[] = {Val
, getI64Imm(64 - OnesOnLeft
, Loc
),
4818 getI64Imm(NumOfLeadingZeros
, Loc
)};
4819 CurDAG
->SelectNodeTo(N
, PPC::RLDICL
, MVT::i64
, Ops
);
4823 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode
*N
) {
4824 assert(N
->getOpcode() == ISD::AND
&& "ISD::AND SDNode expected");
4826 if (!isInt32Immediate(N
->getOperand(1), Imm
))
4829 SDValue Val
= N
->getOperand(0);
4831 // ISD::OR doesn't get all the bitfield insertion fun.
4832 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4834 if (Val
.getOpcode() != ISD::OR
|| !isInt32Immediate(Val
.getOperand(1), Imm2
))
4837 // The idea here is to check whether this is equivalent to:
4838 // (c1 & m) | (x & ~m)
4839 // where m is a run-of-ones mask. The logic here is that, for each bit in
4841 // - if both are 1, then the output will be 1.
4842 // - if both are 0, then the output will be 0.
4843 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4845 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4847 // If that last condition is never the case, then we can form m from the
4848 // bits that are the same between c1 and c2.
4850 if (isRunOfOnes(~(Imm
^ Imm2
), MB
, ME
) && !(~Imm
& Imm2
)) {
4852 SDValue Ops
[] = {Val
.getOperand(0), Val
.getOperand(1), getI32Imm(0, dl
),
4853 getI32Imm(MB
, dl
), getI32Imm(ME
, dl
)};
4854 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
4861 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode
*N
) {
4862 assert(N
->getOpcode() == ISD::AND
&& "ISD::AND SDNode expected");
4864 if (!isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) || !isMask_64(Imm64
))
4867 // If this is a 64-bit zero-extension mask, emit rldicl.
4868 unsigned MB
= 64 - countTrailingOnes(Imm64
);
4871 SDValue Val
= N
->getOperand(0);
4874 if (Val
.getOpcode() == ISD::ANY_EXTEND
) {
4875 auto Op0
= Val
.getOperand(0);
4876 if (Op0
.getOpcode() == ISD::SRL
&&
4877 isInt32Immediate(Op0
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4879 auto ResultType
= Val
.getNode()->getValueType(0);
4880 auto ImDef
= CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
, ResultType
);
4881 SDValue
IDVal(ImDef
, 0);
4883 Val
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
, ResultType
,
4884 IDVal
, Op0
.getOperand(0),
4891 // If the operand is a logical right shift, we can fold it into this
4892 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4893 // for n <= mb. The right shift is really a left rotate followed by a
4894 // mask, and this mask is a more-restrictive sub-mask of the mask implied
4896 if (Val
.getOpcode() == ISD::SRL
&&
4897 isInt32Immediate(Val
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
4898 assert(Imm
< 64 && "Illegal shift amount");
4899 Val
= Val
.getOperand(0);
4903 SDValue Ops
[] = {Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
)};
4904 CurDAG
->SelectNodeTo(N
, PPC::RLDICL
, MVT::i64
, Ops
);
4908 bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode
*N
) {
4909 assert(N
->getOpcode() == ISD::AND
&& "ISD::AND SDNode expected");
4911 if (!isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) ||
4915 // If this is a negated 64-bit zero-extension mask,
4916 // i.e. the immediate is a sequence of ones from most significant side
4917 // and all zero for reminder, we should use rldicr.
4918 unsigned MB
= 63 - countTrailingOnes(~Imm64
);
4921 SDValue Ops
[] = {N
->getOperand(0), getI32Imm(SH
, dl
), getI32Imm(MB
, dl
)};
4922 CurDAG
->SelectNodeTo(N
, PPC::RLDICR
, MVT::i64
, Ops
);
4926 bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode
*N
) {
4927 assert(N
->getOpcode() == ISD::OR
&& "ISD::OR SDNode expected");
4930 SDValue N0
= N
->getOperand(0);
4932 // We won't get fewer instructions if the imm is 32-bit integer.
4933 // rldimi requires the imm to have consecutive ones with both sides zero.
4934 // Also, make sure the first Op has only one use, otherwise this may increase
4935 // register pressure since rldimi is destructive.
4936 if (!isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) ||
4937 isUInt
<32>(Imm64
) || !isRunOfOnes64(Imm64
, MB
, ME
) || !N0
.hasOneUse())
4940 unsigned SH
= 63 - ME
;
4942 // Use select64Imm for making LI instr instead of directly putting Imm64
4945 SDValue(selectI64Imm(CurDAG
, getI64Imm(-1, Dl
).getNode()), 0),
4946 getI32Imm(SH
, Dl
), getI32Imm(MB
, Dl
)};
4947 CurDAG
->SelectNodeTo(N
, PPC::RLDIMI
, MVT::i64
, Ops
);
4951 // Select - Convert the specified operand from a target-independent to a
4952 // target-specific node if it hasn't already been changed.
4953 void PPCDAGToDAGISel::Select(SDNode
*N
) {
4955 if (N
->isMachineOpcode()) {
4957 return; // Already selected.
4960 // In case any misguided DAG-level optimizations form an ADD with a
4961 // TargetConstant operand, crash here instead of miscompiling (by selecting
4962 // an r+r add instead of some kind of r+i add).
4963 if (N
->getOpcode() == ISD::ADD
&&
4964 N
->getOperand(1).getOpcode() == ISD::TargetConstant
)
4965 llvm_unreachable("Invalid ADD with TargetConstant operand");
4967 // Try matching complex bit permutations before doing anything else.
4968 if (tryBitPermutation(N
))
4971 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4972 if (tryIntCompareInGPR(N
))
4975 switch (N
->getOpcode()) {
4979 if (N
->getValueType(0) == MVT::i64
) {
4980 ReplaceNode(N
, selectI64Imm(CurDAG
, N
));
4985 case ISD::INTRINSIC_WO_CHAIN
: {
4986 // We emit the PPC::FSELS instruction here because of type conflicts with
4987 // the comparison operand. The FSELS instruction is defined to use an 8-byte
4988 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
4989 // value for the comparison. When selecting through a .td file, a type
4990 // error is raised. Must check this first so we never break on the
4991 // !Subtarget->isISA3_1() check.
4992 if (N
->getConstantOperandVal(0) == Intrinsic::ppc_fsels
) {
4993 SDValue Ops
[] = {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3)};
4994 CurDAG
->SelectNodeTo(N
, PPC::FSELS
, MVT::f32
, Ops
);
4998 if (!Subtarget
->isISA3_1())
5000 unsigned Opcode
= 0;
5001 switch (N
->getConstantOperandVal(0)) {
5004 case Intrinsic::ppc_altivec_vstribr_p
:
5005 Opcode
= PPC::VSTRIBR_rec
;
5007 case Intrinsic::ppc_altivec_vstribl_p
:
5008 Opcode
= PPC::VSTRIBL_rec
;
5010 case Intrinsic::ppc_altivec_vstrihr_p
:
5011 Opcode
= PPC::VSTRIHR_rec
;
5013 case Intrinsic::ppc_altivec_vstrihl_p
:
5014 Opcode
= PPC::VSTRIHL_rec
;
5020 // Generate the appropriate vector string isolate intrinsic to match.
5021 EVT VTs
[] = {MVT::v16i8
, MVT::Glue
};
5023 SDValue(CurDAG
->getMachineNode(Opcode
, dl
, VTs
, N
->getOperand(2)), 0);
5024 // Vector string isolate instructions update the EQ bit of CR6.
5025 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5026 SDValue SubRegIdx
= CurDAG
->getTargetConstant(PPC::sub_eq
, dl
, MVT::i32
);
5027 SDValue CR6Reg
= CurDAG
->getRegister(PPC::CR6
, MVT::i32
);
5028 SDValue CRBit
= SDValue(
5029 CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
, dl
, MVT::i1
,
5030 CR6Reg
, SubRegIdx
, VecStrOp
.getValue(1)),
5032 CurDAG
->SelectNodeTo(N
, PPC::SETBC
, MVT::i32
, CRBit
);
5037 case ISD::STRICT_FSETCC
:
5038 case ISD::STRICT_FSETCCS
:
5042 // These nodes will be transformed into GETtlsADDR32 node, which
5043 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5044 case PPCISD::ADDI_TLSLD_L_ADDR
:
5045 case PPCISD::ADDI_TLSGD_L_ADDR
: {
5046 const Module
*Mod
= MF
->getFunction().getParent();
5047 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) != MVT::i32
||
5048 !Subtarget
->isSecurePlt() || !Subtarget
->isTargetELF() ||
5049 Mod
->getPICLevel() == PICLevel::SmallPIC
)
5051 // Attach global base pointer on GETtlsADDR32 node in order to
5052 // generate secure plt code for TLS symbols.
5055 case PPCISD::CALL
: {
5056 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) != MVT::i32
||
5057 !TM
.isPositionIndependent() || !Subtarget
->isSecurePlt() ||
5058 !Subtarget
->isTargetELF())
5061 SDValue Op
= N
->getOperand(1);
5063 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) {
5064 if (GA
->getTargetFlags() == PPCII::MO_PLT
)
5067 else if (ExternalSymbolSDNode
*ES
= dyn_cast
<ExternalSymbolSDNode
>(Op
)) {
5068 if (ES
->getTargetFlags() == PPCII::MO_PLT
)
5074 case PPCISD::GlobalBaseReg
:
5075 ReplaceNode(N
, getGlobalBaseReg());
5078 case ISD::FrameIndex
:
5079 selectFrameIndex(N
, N
);
5082 case PPCISD::MFOCRF
: {
5083 SDValue InFlag
= N
->getOperand(1);
5084 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
,
5085 N
->getOperand(0), InFlag
));
5089 case PPCISD::READ_TIME_BASE
:
5090 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ReadTB
, dl
, MVT::i32
, MVT::i32
,
5091 MVT::Other
, N
->getOperand(0)));
5094 case PPCISD::SRA_ADDZE
: {
5095 SDValue N0
= N
->getOperand(0);
5097 CurDAG
->getTargetConstant(*cast
<ConstantSDNode
>(N
->getOperand(1))->
5098 getConstantIntValue(), dl
,
5099 N
->getValueType(0));
5100 if (N
->getValueType(0) == MVT::i64
) {
5102 CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, MVT::Glue
,
5104 CurDAG
->SelectNodeTo(N
, PPC::ADDZE8
, MVT::i64
, SDValue(Op
, 0),
5108 assert(N
->getValueType(0) == MVT::i32
&&
5109 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5111 CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, MVT::Glue
,
5113 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
, SDValue(Op
, 0),
5120 // Change TLS initial-exec D-form stores to X-form stores.
5121 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
5122 if (EnableTLSOpt
&& Subtarget
->isELFv2ABI() &&
5123 ST
->getAddressingMode() != ISD::PRE_INC
)
5124 if (tryTLSXFormStore(ST
))
5129 // Handle preincrement loads.
5130 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
5131 EVT LoadedVT
= LD
->getMemoryVT();
5133 // Normal loads are handled by code generated from the .td file.
5134 if (LD
->getAddressingMode() != ISD::PRE_INC
) {
5135 // Change TLS initial-exec D-form loads to X-form loads.
5136 if (EnableTLSOpt
&& Subtarget
->isELFv2ABI())
5137 if (tryTLSXFormLoad(LD
))
5142 SDValue Offset
= LD
->getOffset();
5143 if (Offset
.getOpcode() == ISD::TargetConstant
||
5144 Offset
.getOpcode() == ISD::TargetGlobalAddress
) {
5147 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
5148 if (LD
->getValueType(0) != MVT::i64
) {
5149 // Handle PPC32 integer and normal FP loads.
5150 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
5151 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
5152 default: llvm_unreachable("Invalid PPC load type!");
5153 case MVT::f64
: Opcode
= PPC::LFDU
; break;
5154 case MVT::f32
: Opcode
= PPC::LFSU
; break;
5155 case MVT::i32
: Opcode
= PPC::LWZU
; break;
5156 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU
: PPC::LHZU
; break;
5158 case MVT::i8
: Opcode
= PPC::LBZU
; break;
5161 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
5162 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
5163 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
5164 default: llvm_unreachable("Invalid PPC load type!");
5165 case MVT::i64
: Opcode
= PPC::LDU
; break;
5166 case MVT::i32
: Opcode
= PPC::LWZU8
; break;
5167 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU8
: PPC::LHZU8
; break;
5169 case MVT::i8
: Opcode
= PPC::LBZU8
; break;
5173 SDValue Chain
= LD
->getChain();
5174 SDValue Base
= LD
->getBasePtr();
5175 SDValue Ops
[] = { Offset
, Base
, Chain
};
5176 SDNode
*MN
= CurDAG
->getMachineNode(
5177 Opcode
, dl
, LD
->getValueType(0),
5178 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
5179 transferMemOperands(N
, MN
);
5184 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
5185 if (LD
->getValueType(0) != MVT::i64
) {
5186 // Handle PPC32 integer and normal FP loads.
5187 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
5188 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
5189 default: llvm_unreachable("Invalid PPC load type!");
5190 case MVT::f64
: Opcode
= PPC::LFDUX
; break;
5191 case MVT::f32
: Opcode
= PPC::LFSUX
; break;
5192 case MVT::i32
: Opcode
= PPC::LWZUX
; break;
5193 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX
: PPC::LHZUX
; break;
5195 case MVT::i8
: Opcode
= PPC::LBZUX
; break;
5198 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
5199 assert((!isSExt
|| LoadedVT
== MVT::i16
|| LoadedVT
== MVT::i32
) &&
5200 "Invalid sext update load");
5201 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
5202 default: llvm_unreachable("Invalid PPC load type!");
5203 case MVT::i64
: Opcode
= PPC::LDUX
; break;
5204 case MVT::i32
: Opcode
= isSExt
? PPC::LWAUX
: PPC::LWZUX8
; break;
5205 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX8
: PPC::LHZUX8
; break;
5207 case MVT::i8
: Opcode
= PPC::LBZUX8
; break;
5211 SDValue Chain
= LD
->getChain();
5212 SDValue Base
= LD
->getBasePtr();
5213 SDValue Ops
[] = { Base
, Offset
, Chain
};
5214 SDNode
*MN
= CurDAG
->getMachineNode(
5215 Opcode
, dl
, LD
->getValueType(0),
5216 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
5217 transferMemOperands(N
, MN
);
5224 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5225 if (tryAsSingleRLWINM(N
) || tryAsSingleRLWIMI(N
) || tryAsSingleRLDICL(N
) ||
5226 tryAsSingleRLDICR(N
) || tryAsSingleRLWINM8(N
) || tryAsPairOfRLDICL(N
))
5229 // Other cases are autogenerated.
5232 if (N
->getValueType(0) == MVT::i32
)
5233 if (tryBitfieldInsert(N
))
5237 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
5238 isIntS16Immediate(N
->getOperand(1), Imm
)) {
5239 KnownBits LHSKnown
= CurDAG
->computeKnownBits(N
->getOperand(0));
5241 // If this is equivalent to an add, then we can fold it with the
5242 // FrameIndex calculation.
5243 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)Imm
) == ~0ULL) {
5244 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
5249 // If this is 'or' against an imm with consecutive ones and both sides zero,
5250 // try to emit rldimi
5251 if (tryAsSingleRLDIMI(N
))
5254 // OR with a 32-bit immediate can be handled by ori + oris
5255 // without creating an immediate in a GPR.
5257 bool IsPPC64
= Subtarget
->isPPC64();
5258 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
5259 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
5260 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5261 uint64_t ImmHi
= Imm64
>> 16;
5262 uint64_t ImmLo
= Imm64
& 0xFFFF;
5263 if (ImmHi
!= 0 && ImmLo
!= 0) {
5264 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
5266 getI16Imm(ImmLo
, dl
));
5267 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
5268 CurDAG
->SelectNodeTo(N
, PPC::ORIS8
, MVT::i64
, Ops1
);
5273 // Other cases are autogenerated.
5277 // XOR with a 32-bit immediate can be handled by xori + xoris
5278 // without creating an immediate in a GPR.
5280 bool IsPPC64
= Subtarget
->isPPC64();
5281 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
5282 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
5283 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5284 uint64_t ImmHi
= Imm64
>> 16;
5285 uint64_t ImmLo
= Imm64
& 0xFFFF;
5286 if (ImmHi
!= 0 && ImmLo
!= 0) {
5287 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
5289 getI16Imm(ImmLo
, dl
));
5290 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
5291 CurDAG
->SelectNodeTo(N
, PPC::XORIS8
, MVT::i64
, Ops1
);
5300 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
5301 isIntS16Immediate(N
->getOperand(1), Imm
)) {
5302 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
5309 unsigned Imm
, SH
, MB
, ME
;
5310 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
5311 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
5312 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
5313 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
5314 getI32Imm(ME
, dl
) };
5315 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
5319 // Other cases are autogenerated.
5323 unsigned Imm
, SH
, MB
, ME
;
5324 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
5325 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
5326 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
5327 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
5328 getI32Imm(ME
, dl
) };
5329 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
5333 // Other cases are autogenerated.
5337 SDValue Op1
= N
->getOperand(1);
5338 if (Op1
.getOpcode() != ISD::Constant
|| Op1
.getValueType() != MVT::i64
)
5341 // If the multiplier fits int16, we can handle it with mulli.
5342 int64_t Imm
= cast
<ConstantSDNode
>(Op1
)->getZExtValue();
5343 unsigned Shift
= countTrailingZeros
<uint64_t>(Imm
);
5344 if (isInt
<16>(Imm
) || !Shift
)
5347 // If the shifted value fits int16, we can do this transformation:
5348 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5349 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5350 uint64_t ImmSh
= Imm
>> Shift
;
5351 if (isInt
<16>(ImmSh
)) {
5352 uint64_t SextImm
= SignExtend64(ImmSh
& 0xFFFF, 16);
5353 SDValue SDImm
= CurDAG
->getTargetConstant(SextImm
, dl
, MVT::i64
);
5354 SDNode
*MulNode
= CurDAG
->getMachineNode(PPC::MULLI8
, dl
, MVT::i64
,
5355 N
->getOperand(0), SDImm
);
5356 CurDAG
->SelectNodeTo(N
, PPC::RLDICR
, MVT::i64
, SDValue(MulNode
, 0),
5357 getI32Imm(Shift
, dl
), getI32Imm(63 - Shift
, dl
));
5362 // FIXME: Remove this once the ANDI glue bug is fixed:
5363 case PPCISD::ANDI_rec_1_EQ_BIT
:
5364 case PPCISD::ANDI_rec_1_GT_BIT
: {
5368 EVT InVT
= N
->getOperand(0).getValueType();
5369 assert((InVT
== MVT::i64
|| InVT
== MVT::i32
) &&
5370 "Invalid input type for ANDI_rec_1_EQ_BIT");
5372 unsigned Opcode
= (InVT
== MVT::i64
) ? PPC::ANDI8_rec
: PPC::ANDI_rec
;
5373 SDValue
AndI(CurDAG
->getMachineNode(Opcode
, dl
, InVT
, MVT::Glue
,
5375 CurDAG
->getTargetConstant(1, dl
, InVT
)),
5377 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
5378 SDValue SRIdxVal
= CurDAG
->getTargetConstant(
5379 N
->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT
? PPC::sub_eq
: PPC::sub_gt
,
5382 CurDAG
->SelectNodeTo(N
, TargetOpcode::EXTRACT_SUBREG
, MVT::i1
, CR0Reg
,
5383 SRIdxVal
, SDValue(AndI
.getNode(), 1) /* glue */);
5386 case ISD::SELECT_CC
: {
5387 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
5389 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
5390 bool isPPC64
= (PtrVT
== MVT::i64
);
5392 // If this is a select of i1 operands, we'll pattern match it.
5393 if (Subtarget
->useCRBits() && N
->getOperand(0).getValueType() == MVT::i1
)
5396 if (Subtarget
->isISA3_0() && Subtarget
->isPPC64()) {
5397 bool NeedSwapOps
= false;
5398 bool IsUnCmp
= false;
5399 if (mayUseP9Setb(N
, CC
, CurDAG
, NeedSwapOps
, IsUnCmp
)) {
5400 SDValue LHS
= N
->getOperand(0);
5401 SDValue RHS
= N
->getOperand(1);
5403 std::swap(LHS
, RHS
);
5405 // Make use of SelectCC to generate the comparison to set CR bits, for
5406 // equality comparisons having one literal operand, SelectCC probably
5407 // doesn't need to materialize the whole literal and just use xoris to
5408 // check it first, it leads the following comparison result can't
5409 // exactly represent GT/LT relationship. So to avoid this we specify
5410 // SETGT/SETUGT here instead of SETEQ.
5412 SelectCC(LHS
, RHS
, IsUnCmp
? ISD::SETUGT
: ISD::SETGT
, dl
);
5413 CurDAG
->SelectNodeTo(
5414 N
, N
->getSimpleValueType(0) == MVT::i64
? PPC::SETB8
: PPC::SETB
,
5415 N
->getValueType(0), GenCC
);
5421 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5423 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
5424 if (ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
5425 if (ConstantSDNode
*N3C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
5426 if (N1C
->isNullValue() && N3C
->isNullValue() &&
5427 N2C
->getZExtValue() == 1ULL && CC
== ISD::SETNE
&&
5428 // FIXME: Implement this optzn for PPC64.
5429 N
->getValueType(0) == MVT::i32
) {
5431 CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
5432 N
->getOperand(0), getI32Imm(~0U, dl
));
5433 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(Tmp
, 0),
5434 N
->getOperand(0), SDValue(Tmp
, 1));
5438 SDValue CCReg
= SelectCC(N
->getOperand(0), N
->getOperand(1), CC
, dl
);
5440 if (N
->getValueType(0) == MVT::i1
) {
5441 // An i1 select is: (c & t) | (!c & f).
5443 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
5447 default: llvm_unreachable("Invalid CC index");
5448 case 0: SRI
= PPC::sub_lt
; break;
5449 case 1: SRI
= PPC::sub_gt
; break;
5450 case 2: SRI
= PPC::sub_eq
; break;
5451 case 3: SRI
= PPC::sub_un
; break;
5454 SDValue CCBit
= CurDAG
->getTargetExtractSubreg(SRI
, dl
, MVT::i1
, CCReg
);
5456 SDValue
NotCCBit(CurDAG
->getMachineNode(PPC::CRNOR
, dl
, MVT::i1
,
5458 SDValue C
= Inv
? NotCCBit
: CCBit
,
5459 NotC
= Inv
? CCBit
: NotCCBit
;
5461 SDValue
CAndT(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
5462 C
, N
->getOperand(2)), 0);
5463 SDValue
NotCAndF(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
5464 NotC
, N
->getOperand(3)), 0);
5466 CurDAG
->SelectNodeTo(N
, PPC::CROR
, MVT::i1
, CAndT
, NotCAndF
);
5471 getPredicateForSetCC(CC
, N
->getOperand(0).getValueType(), Subtarget
);
5473 unsigned SelectCCOp
;
5474 if (N
->getValueType(0) == MVT::i32
)
5475 SelectCCOp
= PPC::SELECT_CC_I4
;
5476 else if (N
->getValueType(0) == MVT::i64
)
5477 SelectCCOp
= PPC::SELECT_CC_I8
;
5478 else if (N
->getValueType(0) == MVT::f32
) {
5479 if (Subtarget
->hasP8Vector())
5480 SelectCCOp
= PPC::SELECT_CC_VSSRC
;
5481 else if (Subtarget
->hasSPE())
5482 SelectCCOp
= PPC::SELECT_CC_SPE4
;
5484 SelectCCOp
= PPC::SELECT_CC_F4
;
5485 } else if (N
->getValueType(0) == MVT::f64
) {
5486 if (Subtarget
->hasVSX())
5487 SelectCCOp
= PPC::SELECT_CC_VSFRC
;
5488 else if (Subtarget
->hasSPE())
5489 SelectCCOp
= PPC::SELECT_CC_SPE
;
5491 SelectCCOp
= PPC::SELECT_CC_F8
;
5492 } else if (N
->getValueType(0) == MVT::f128
)
5493 SelectCCOp
= PPC::SELECT_CC_F16
;
5494 else if (Subtarget
->hasSPE())
5495 SelectCCOp
= PPC::SELECT_CC_SPE
;
5496 else if (N
->getValueType(0) == MVT::v2f64
||
5497 N
->getValueType(0) == MVT::v2i64
)
5498 SelectCCOp
= PPC::SELECT_CC_VSRC
;
5500 SelectCCOp
= PPC::SELECT_CC_VRRC
;
5502 SDValue Ops
[] = { CCReg
, N
->getOperand(2), N
->getOperand(3),
5503 getI32Imm(BROpc
, dl
) };
5504 CurDAG
->SelectNodeTo(N
, SelectCCOp
, N
->getValueType(0), Ops
);
5507 case ISD::VECTOR_SHUFFLE
:
5508 if (Subtarget
->hasVSX() && (N
->getValueType(0) == MVT::v2f64
||
5509 N
->getValueType(0) == MVT::v2i64
)) {
5510 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
5512 SDValue Op1
= N
->getOperand(SVN
->getMaskElt(0) < 2 ? 0 : 1),
5513 Op2
= N
->getOperand(SVN
->getMaskElt(1) < 2 ? 0 : 1);
5516 for (int i
= 0; i
< 2; ++i
)
5517 if (SVN
->getMaskElt(i
) <= 0 || SVN
->getMaskElt(i
) == 2)
5522 if (Op1
== Op2
&& DM
[0] == 0 && DM
[1] == 0 &&
5523 Op1
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
5524 isa
<LoadSDNode
>(Op1
.getOperand(0))) {
5525 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op1
.getOperand(0));
5526 SDValue Base
, Offset
;
5528 if (LD
->isUnindexed() && LD
->hasOneUse() && Op1
.hasOneUse() &&
5529 (LD
->getMemoryVT() == MVT::f64
||
5530 LD
->getMemoryVT() == MVT::i64
) &&
5531 SelectAddrIdxOnly(LD
->getBasePtr(), Base
, Offset
)) {
5532 SDValue Chain
= LD
->getChain();
5533 SDValue Ops
[] = { Base
, Offset
, Chain
};
5534 MachineMemOperand
*MemOp
= LD
->getMemOperand();
5535 SDNode
*NewN
= CurDAG
->SelectNodeTo(N
, PPC::LXVDSX
,
5536 N
->getValueType(0), Ops
);
5537 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(NewN
), {MemOp
});
5542 // For little endian, we must swap the input operands and adjust
5543 // the mask elements (reverse and invert them).
5544 if (Subtarget
->isLittleEndian()) {
5545 std::swap(Op1
, Op2
);
5546 unsigned tmp
= DM
[0];
5551 SDValue DMV
= CurDAG
->getTargetConstant(DM
[1] | (DM
[0] << 1), dl
,
5553 SDValue Ops
[] = { Op1
, Op2
, DMV
};
5554 CurDAG
->SelectNodeTo(N
, PPC::XXPERMDI
, N
->getValueType(0), Ops
);
5561 bool IsPPC64
= Subtarget
->isPPC64();
5562 SDValue Ops
[] = { N
->getOperand(1), N
->getOperand(0) };
5563 CurDAG
->SelectNodeTo(N
, N
->getOpcode() == PPCISD::BDNZ
5564 ? (IsPPC64
? PPC::BDNZ8
: PPC::BDNZ
)
5565 : (IsPPC64
? PPC::BDZ8
: PPC::BDZ
),
5569 case PPCISD::COND_BRANCH
: {
5570 // Op #0 is the Chain.
5571 // Op #1 is the PPC::PRED_* number.
5573 // Op #3 is the Dest MBB
5574 // Op #4 is the Flag.
5575 // Prevent PPC::PRED_* from being selected into LI.
5576 unsigned PCC
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
5577 if (EnableBranchHint
)
5578 PCC
|= getBranchHint(PCC
, *FuncInfo
, N
->getOperand(3));
5580 SDValue Pred
= getI32Imm(PCC
, dl
);
5581 SDValue Ops
[] = { Pred
, N
->getOperand(2), N
->getOperand(3),
5582 N
->getOperand(0), N
->getOperand(4) };
5583 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
5587 if (tryFoldSWTestBRCC(N
))
5589 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
5591 getPredicateForSetCC(CC
, N
->getOperand(2).getValueType(), Subtarget
);
5593 if (N
->getOperand(2).getValueType() == MVT::i1
) {
5597 default: llvm_unreachable("Unexpected Boolean-operand predicate");
5598 case PPC::PRED_LT
: Opc
= PPC::CRANDC
; Swap
= true; break;
5599 case PPC::PRED_LE
: Opc
= PPC::CRORC
; Swap
= true; break;
5600 case PPC::PRED_EQ
: Opc
= PPC::CREQV
; Swap
= false; break;
5601 case PPC::PRED_GE
: Opc
= PPC::CRORC
; Swap
= false; break;
5602 case PPC::PRED_GT
: Opc
= PPC::CRANDC
; Swap
= false; break;
5603 case PPC::PRED_NE
: Opc
= PPC::CRXOR
; Swap
= false; break;
5606 // A signed comparison of i1 values produces the opposite result to an
5607 // unsigned one if the condition code includes less-than or greater-than.
5608 // This is because 1 is the most negative signed i1 number and the most
5609 // positive unsigned i1 number. The CR-logical operations used for such
5610 // comparisons are non-commutative so for signed comparisons vs. unsigned
5611 // ones, the input operands just need to be swapped.
5612 if (ISD::isSignedIntSetCC(CC
))
5615 SDValue
BitComp(CurDAG
->getMachineNode(Opc
, dl
, MVT::i1
,
5616 N
->getOperand(Swap
? 3 : 2),
5617 N
->getOperand(Swap
? 2 : 3)), 0);
5618 CurDAG
->SelectNodeTo(N
, PPC::BC
, MVT::Other
, BitComp
, N
->getOperand(4),
5623 if (EnableBranchHint
)
5624 PCC
|= getBranchHint(PCC
, *FuncInfo
, N
->getOperand(4));
5626 SDValue CondCode
= SelectCC(N
->getOperand(2), N
->getOperand(3), CC
, dl
);
5627 SDValue Ops
[] = { getI32Imm(PCC
, dl
), CondCode
,
5628 N
->getOperand(4), N
->getOperand(0) };
5629 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
5633 // FIXME: Should custom lower this.
5634 SDValue Chain
= N
->getOperand(0);
5635 SDValue Target
= N
->getOperand(1);
5636 unsigned Opc
= Target
.getValueType() == MVT::i32
? PPC::MTCTR
: PPC::MTCTR8
;
5637 unsigned Reg
= Target
.getValueType() == MVT::i32
? PPC::BCTR
: PPC::BCTR8
;
5638 Chain
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, Target
,
5640 CurDAG
->SelectNodeTo(N
, Reg
, MVT::Other
, Chain
);
5643 case PPCISD::TOC_ENTRY
: {
5644 const bool isPPC64
= Subtarget
->isPPC64();
5645 const bool isELFABI
= Subtarget
->isSVR4ABI();
5646 const bool isAIXABI
= Subtarget
->isAIXABI();
5648 // PowerPC only support small, medium and large code model.
5649 const CodeModel::Model CModel
= TM
.getCodeModel();
5650 assert(!(CModel
== CodeModel::Tiny
|| CModel
== CodeModel::Kernel
) &&
5651 "PowerPC doesn't support tiny or kernel code models.");
5653 if (isAIXABI
&& CModel
== CodeModel::Medium
)
5654 report_fatal_error("Medium code model is not supported on AIX.");
5656 // For 64-bit small code model, we allow SelectCodeCommon to handle this,
5657 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
5658 if (isPPC64
&& CModel
== CodeModel::Small
)
5661 // Handle 32-bit small code model.
5663 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
5664 // PPC::ADDItoc, or PPC::LWZtoc
5665 auto replaceWith
= [this, &dl
](unsigned OpCode
, SDNode
*TocEntry
) {
5666 SDValue GA
= TocEntry
->getOperand(0);
5667 SDValue TocBase
= TocEntry
->getOperand(1);
5668 SDNode
*MN
= CurDAG
->getMachineNode(OpCode
, dl
, MVT::i32
, GA
, TocBase
);
5669 transferMemOperands(TocEntry
, MN
);
5670 ReplaceNode(TocEntry
, MN
);
5674 assert(TM
.isPositionIndependent() &&
5675 "32-bit ELF can only have TOC entries in position independent"
5677 // 32-bit ELF always uses a small code model toc access.
5678 replaceWith(PPC::LWZtoc
, N
);
5682 if (isAIXABI
&& CModel
== CodeModel::Small
) {
5683 if (hasTocDataAttr(N
->getOperand(0),
5684 CurDAG
->getDataLayout().getPointerSize()))
5685 replaceWith(PPC::ADDItoc
, N
);
5687 replaceWith(PPC::LWZtoc
, N
);
5693 assert(CModel
!= CodeModel::Small
&& "All small code models handled.");
5695 assert((isPPC64
|| (isAIXABI
&& !isPPC64
)) && "We are dealing with 64-bit"
5696 " ELF/AIX or 32-bit AIX in the following.");
5698 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
5699 // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
5700 // generate two instructions as described below. The first source operand
5701 // is a symbol reference. If it must be toc-referenced according to
5702 // Subtarget, we generate:
5704 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
5706 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
5707 // Otherwise we generate:
5708 // ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
5709 SDValue GA
= N
->getOperand(0);
5710 SDValue TOCbase
= N
->getOperand(1);
5712 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
5713 SDNode
*Tmp
= CurDAG
->getMachineNode(
5714 isPPC64
? PPC::ADDIStocHA8
: PPC::ADDIStocHA
, dl
, VT
, TOCbase
, GA
);
5716 if (PPCLowering
->isAccessedAsGotIndirect(GA
)) {
5717 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
5719 SDNode
*MN
= CurDAG
->getMachineNode(
5720 isPPC64
? PPC::LDtocL
: PPC::LWZtocL
, dl
, VT
, GA
, SDValue(Tmp
, 0));
5722 transferMemOperands(N
, MN
);
5727 // Build the address relative to the TOC-pointer.
5728 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ADDItocL
, dl
, MVT::i64
,
5729 SDValue(Tmp
, 0), GA
));
5732 case PPCISD::PPC32_PICGOT
:
5733 // Generate a PIC-safe GOT reference.
5734 assert(Subtarget
->is32BitELFABI() &&
5735 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
5736 CurDAG
->SelectNodeTo(N
, PPC::PPC32PICGOT
,
5737 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()),
5741 case PPCISD::VADD_SPLAT
: {
5742 // This expands into one of three sequences, depending on whether
5743 // the first operand is odd or even, positive or negative.
5744 assert(isa
<ConstantSDNode
>(N
->getOperand(0)) &&
5745 isa
<ConstantSDNode
>(N
->getOperand(1)) &&
5746 "Invalid operand on VADD_SPLAT!");
5748 int Elt
= N
->getConstantOperandVal(0);
5749 int EltSize
= N
->getConstantOperandVal(1);
5750 unsigned Opc1
, Opc2
, Opc3
;
5754 Opc1
= PPC::VSPLTISB
;
5755 Opc2
= PPC::VADDUBM
;
5756 Opc3
= PPC::VSUBUBM
;
5758 } else if (EltSize
== 2) {
5759 Opc1
= PPC::VSPLTISH
;
5760 Opc2
= PPC::VADDUHM
;
5761 Opc3
= PPC::VSUBUHM
;
5764 assert(EltSize
== 4 && "Invalid element size on VADD_SPLAT!");
5765 Opc1
= PPC::VSPLTISW
;
5766 Opc2
= PPC::VADDUWM
;
5767 Opc3
= PPC::VSUBUWM
;
5771 if ((Elt
& 1) == 0) {
5772 // Elt is even, in the range [-32,-18] + [16,30].
5774 // Convert: VADD_SPLAT elt, size
5775 // Into: tmp = VSPLTIS[BHW] elt
5776 // VADDU[BHW]M tmp, tmp
5777 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
5778 SDValue EltVal
= getI32Imm(Elt
>> 1, dl
);
5779 SDNode
*Tmp
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5780 SDValue TmpVal
= SDValue(Tmp
, 0);
5781 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, TmpVal
, TmpVal
));
5783 } else if (Elt
> 0) {
5784 // Elt is odd and positive, in the range [17,31].
5786 // Convert: VADD_SPLAT elt, size
5787 // Into: tmp1 = VSPLTIS[BHW] elt-16
5788 // tmp2 = VSPLTIS[BHW] -16
5789 // VSUBU[BHW]M tmp1, tmp2
5790 SDValue EltVal
= getI32Imm(Elt
- 16, dl
);
5791 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5792 EltVal
= getI32Imm(-16, dl
);
5793 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5794 ReplaceNode(N
, CurDAG
->getMachineNode(Opc3
, dl
, VT
, SDValue(Tmp1
, 0),
5798 // Elt is odd and negative, in the range [-31,-17].
5800 // Convert: VADD_SPLAT elt, size
5801 // Into: tmp1 = VSPLTIS[BHW] elt+16
5802 // tmp2 = VSPLTIS[BHW] -16
5803 // VADDU[BHW]M tmp1, tmp2
5804 SDValue EltVal
= getI32Imm(Elt
+ 16, dl
);
5805 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5806 EltVal
= getI32Imm(-16, dl
);
5807 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
5808 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, SDValue(Tmp1
, 0),
5818 // If the target supports the cmpb instruction, do the idiom recognition here.
5819 // We don't do this as a DAG combine because we don't want to do it as nodes
5820 // are being combined (because we might miss part of the eventual idiom). We
5821 // don't want to do it during instruction selection because we want to reuse
5822 // the logic for lowering the masking operations already part of the
5823 // instruction selector.
5824 SDValue
PPCDAGToDAGISel::combineToCMPB(SDNode
*N
) {
5827 assert(N
->getOpcode() == ISD::OR
&&
5828 "Only OR nodes are supported for CMPB");
5831 if (!Subtarget
->hasCMPB())
5834 if (N
->getValueType(0) != MVT::i32
&&
5835 N
->getValueType(0) != MVT::i64
)
5838 EVT VT
= N
->getValueType(0);
5841 bool BytesFound
[8] = {false, false, false, false, false, false, false, false};
5842 uint64_t Mask
= 0, Alt
= 0;
5844 auto IsByteSelectCC
= [this](SDValue O
, unsigned &b
,
5845 uint64_t &Mask
, uint64_t &Alt
,
5846 SDValue
&LHS
, SDValue
&RHS
) {
5847 if (O
.getOpcode() != ISD::SELECT_CC
)
5849 ISD::CondCode CC
= cast
<CondCodeSDNode
>(O
.getOperand(4))->get();
5851 if (!isa
<ConstantSDNode
>(O
.getOperand(2)) ||
5852 !isa
<ConstantSDNode
>(O
.getOperand(3)))
5855 uint64_t PM
= O
.getConstantOperandVal(2);
5856 uint64_t PAlt
= O
.getConstantOperandVal(3);
5857 for (b
= 0; b
< 8; ++b
) {
5858 uint64_t Mask
= UINT64_C(0xFF) << (8*b
);
5859 if (PM
&& (PM
& Mask
) == PM
&& (PAlt
& Mask
) == PAlt
)
5868 if (!isa
<ConstantSDNode
>(O
.getOperand(1)) ||
5869 O
.getConstantOperandVal(1) != 0) {
5870 SDValue Op0
= O
.getOperand(0), Op1
= O
.getOperand(1);
5871 if (Op0
.getOpcode() == ISD::TRUNCATE
)
5872 Op0
= Op0
.getOperand(0);
5873 if (Op1
.getOpcode() == ISD::TRUNCATE
)
5874 Op1
= Op1
.getOperand(0);
5876 if (Op0
.getOpcode() == ISD::SRL
&& Op1
.getOpcode() == ISD::SRL
&&
5877 Op0
.getOperand(1) == Op1
.getOperand(1) && CC
== ISD::SETEQ
&&
5878 isa
<ConstantSDNode
>(Op0
.getOperand(1))) {
5880 unsigned Bits
= Op0
.getValueSizeInBits();
5883 if (Op0
.getConstantOperandVal(1) != Bits
-8)
5886 LHS
= Op0
.getOperand(0);
5887 RHS
= Op1
.getOperand(0);
5891 // When we have small integers (i16 to be specific), the form present
5892 // post-legalization uses SETULT in the SELECT_CC for the
5893 // higher-order byte, depending on the fact that the
5894 // even-higher-order bytes are known to all be zero, for example:
5895 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5896 // (so when the second byte is the same, because all higher-order
5897 // bits from bytes 3 and 4 are known to be zero, the result of the
5898 // xor can be at most 255)
5899 if (Op0
.getOpcode() == ISD::XOR
&& CC
== ISD::SETULT
&&
5900 isa
<ConstantSDNode
>(O
.getOperand(1))) {
5902 uint64_t ULim
= O
.getConstantOperandVal(1);
5903 if (ULim
!= (UINT64_C(1) << b
*8))
5906 // Now we need to make sure that the upper bytes are known to be
5908 unsigned Bits
= Op0
.getValueSizeInBits();
5909 if (!CurDAG
->MaskedValueIsZero(
5910 Op0
, APInt::getHighBitsSet(Bits
, Bits
- (b
+ 1) * 8)))
5913 LHS
= Op0
.getOperand(0);
5914 RHS
= Op0
.getOperand(1);
5921 if (CC
!= ISD::SETEQ
)
5924 SDValue Op
= O
.getOperand(0);
5925 if (Op
.getOpcode() == ISD::AND
) {
5926 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5928 if (Op
.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b
)))
5931 SDValue XOR
= Op
.getOperand(0);
5932 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5933 XOR
= XOR
.getOperand(0);
5934 if (XOR
.getOpcode() != ISD::XOR
)
5937 LHS
= XOR
.getOperand(0);
5938 RHS
= XOR
.getOperand(1);
5940 } else if (Op
.getOpcode() == ISD::SRL
) {
5941 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
5943 unsigned Bits
= Op
.getValueSizeInBits();
5946 if (Op
.getConstantOperandVal(1) != Bits
-8)
5949 SDValue XOR
= Op
.getOperand(0);
5950 if (XOR
.getOpcode() == ISD::TRUNCATE
)
5951 XOR
= XOR
.getOperand(0);
5952 if (XOR
.getOpcode() != ISD::XOR
)
5955 LHS
= XOR
.getOperand(0);
5956 RHS
= XOR
.getOperand(1);
5963 SmallVector
<SDValue
, 8> Queue(1, SDValue(N
, 0));
5964 while (!Queue
.empty()) {
5965 SDValue V
= Queue
.pop_back_val();
5967 for (const SDValue
&O
: V
.getNode()->ops()) {
5969 uint64_t M
= 0, A
= 0;
5971 if (O
.getOpcode() == ISD::OR
) {
5973 } else if (IsByteSelectCC(O
, b
, M
, A
, OLHS
, ORHS
)) {
5977 BytesFound
[b
] = true;
5980 } else if ((LHS
== ORHS
&& RHS
== OLHS
) ||
5981 (RHS
== ORHS
&& LHS
== OLHS
)) {
5982 BytesFound
[b
] = true;
5994 unsigned LastB
= 0, BCnt
= 0;
5995 for (unsigned i
= 0; i
< 8; ++i
)
5996 if (BytesFound
[LastB
]) {
6001 if (!LastB
|| BCnt
< 2)
6004 // Because we'll be zero-extending the output anyway if don't have a specific
6005 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6006 if (LHS
.getValueType() != VT
) {
6007 LHS
= CurDAG
->getAnyExtOrTrunc(LHS
, dl
, VT
);
6008 RHS
= CurDAG
->getAnyExtOrTrunc(RHS
, dl
, VT
);
6011 Res
= CurDAG
->getNode(PPCISD::CMPB
, dl
, VT
, LHS
, RHS
);
6013 bool NonTrivialMask
= ((int64_t) Mask
) != INT64_C(-1);
6014 if (NonTrivialMask
&& !Alt
) {
6015 // Res = Mask & CMPB
6016 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
6017 CurDAG
->getConstant(Mask
, dl
, VT
));
6019 // Res = (CMPB & Mask) | (~CMPB & Alt)
6020 // Which, as suggested here:
6021 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6022 // can be written as:
6023 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6024 // useful because the (Alt ^ Mask) can be pre-computed.
6025 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
6026 CurDAG
->getConstant(Mask
^ Alt
, dl
, VT
));
6027 Res
= CurDAG
->getNode(ISD::XOR
, dl
, VT
, Res
,
6028 CurDAG
->getConstant(Alt
, dl
, VT
));
6034 // When CR bit registers are enabled, an extension of an i1 variable to a i32
6035 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6036 // involves constant materialization of a 0 or a 1 or both. If the result of
6037 // the extension is then operated upon by some operator that can be constant
6038 // folded with a constant 0 or 1, and that constant can be materialized using
6039 // only one instruction (like a zero or one), then we should fold in those
6040 // operations with the select.
6041 void PPCDAGToDAGISel::foldBoolExts(SDValue
&Res
, SDNode
*&N
) {
6042 if (!Subtarget
->useCRBits())
6045 if (N
->getOpcode() != ISD::ZERO_EXTEND
&&
6046 N
->getOpcode() != ISD::SIGN_EXTEND
&&
6047 N
->getOpcode() != ISD::ANY_EXTEND
)
6050 if (N
->getOperand(0).getValueType() != MVT::i1
)
6053 if (!N
->hasOneUse())
6057 EVT VT
= N
->getValueType(0);
6058 SDValue Cond
= N
->getOperand(0);
6060 CurDAG
->getConstant(N
->getOpcode() == ISD::SIGN_EXTEND
? -1 : 1, dl
, VT
);
6061 SDValue ConstFalse
= CurDAG
->getConstant(0, dl
, VT
);
6064 SDNode
*User
= *N
->use_begin();
6065 if (User
->getNumOperands() != 2)
6068 auto TryFold
= [this, N
, User
, dl
](SDValue Val
) {
6069 SDValue UserO0
= User
->getOperand(0), UserO1
= User
->getOperand(1);
6070 SDValue O0
= UserO0
.getNode() == N
? Val
: UserO0
;
6071 SDValue O1
= UserO1
.getNode() == N
? Val
: UserO1
;
6073 return CurDAG
->FoldConstantArithmetic(User
->getOpcode(), dl
,
6074 User
->getValueType(0), {O0
, O1
});
6077 // FIXME: When the semantics of the interaction between select and undef
6078 // are clearly defined, it may turn out to be unnecessary to break here.
6079 SDValue TrueRes
= TryFold(ConstTrue
);
6080 if (!TrueRes
|| TrueRes
.isUndef())
6082 SDValue FalseRes
= TryFold(ConstFalse
);
6083 if (!FalseRes
|| FalseRes
.isUndef())
6086 // For us to materialize these using one instruction, we must be able to
6087 // represent them as signed 16-bit integers.
6088 uint64_t True
= cast
<ConstantSDNode
>(TrueRes
)->getZExtValue(),
6089 False
= cast
<ConstantSDNode
>(FalseRes
)->getZExtValue();
6090 if (!isInt
<16>(True
) || !isInt
<16>(False
))
6093 // We can replace User with a new SELECT node, and try again to see if we
6094 // can fold the select with its user.
6095 Res
= CurDAG
->getSelect(dl
, User
->getValueType(0), Cond
, TrueRes
, FalseRes
);
6097 ConstTrue
= TrueRes
;
6098 ConstFalse
= FalseRes
;
6099 } while (N
->hasOneUse());
6102 void PPCDAGToDAGISel::PreprocessISelDAG() {
6103 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6105 bool MadeChange
= false;
6106 while (Position
!= CurDAG
->allnodes_begin()) {
6107 SDNode
*N
= &*--Position
;
6112 switch (N
->getOpcode()) {
6115 Res
= combineToCMPB(N
);
6120 foldBoolExts(Res
, N
);
6123 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6124 LLVM_DEBUG(N
->dump(CurDAG
));
6125 LLVM_DEBUG(dbgs() << "\nNew: ");
6126 LLVM_DEBUG(Res
.getNode()->dump(CurDAG
));
6127 LLVM_DEBUG(dbgs() << "\n");
6129 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Res
);
6135 CurDAG
->RemoveDeadNodes();
6138 /// PostprocessISelDAG - Perform some late peephole optimizations
6139 /// on the DAG representation.
6140 void PPCDAGToDAGISel::PostprocessISelDAG() {
6141 // Skip peepholes at -O0.
6142 if (TM
.getOptLevel() == CodeGenOpt::None
)
6147 PeepholePPC64ZExt();
6150 // Check if all users of this node will become isel where the second operand
6151 // is the constant zero. If this is so, and if we can negate the condition,
6152 // then we can flip the true and false operands. This will allow the zero to
6153 // be folded with the isel so that we don't need to materialize a register
6155 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode
*N
) {
6156 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
6159 if (!User
->isMachineOpcode())
6161 if (User
->getMachineOpcode() != PPC::SELECT_I4
&&
6162 User
->getMachineOpcode() != PPC::SELECT_I8
)
6165 SDNode
*Op1
= User
->getOperand(1).getNode();
6166 SDNode
*Op2
= User
->getOperand(2).getNode();
6167 // If we have a degenerate select with two equal operands, swapping will
6168 // not do anything, and we may run into an infinite loop.
6172 if (!Op2
->isMachineOpcode())
6175 if (Op2
->getMachineOpcode() != PPC::LI
&&
6176 Op2
->getMachineOpcode() != PPC::LI8
)
6179 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op2
->getOperand(0));
6183 if (!C
->isNullValue())
6190 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode
*N
) {
6191 SmallVector
<SDNode
*, 4> ToReplace
;
6192 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
6195 assert((User
->getMachineOpcode() == PPC::SELECT_I4
||
6196 User
->getMachineOpcode() == PPC::SELECT_I8
) &&
6197 "Must have all select users");
6198 ToReplace
.push_back(User
);
6201 for (SmallVector
<SDNode
*, 4>::iterator UI
= ToReplace
.begin(),
6202 UE
= ToReplace
.end(); UI
!= UE
; ++UI
) {
6205 CurDAG
->getMachineNode(User
->getMachineOpcode(), SDLoc(User
),
6206 User
->getValueType(0), User
->getOperand(0),
6207 User
->getOperand(2),
6208 User
->getOperand(1));
6210 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6211 LLVM_DEBUG(User
->dump(CurDAG
));
6212 LLVM_DEBUG(dbgs() << "\nNew: ");
6213 LLVM_DEBUG(ResNode
->dump(CurDAG
));
6214 LLVM_DEBUG(dbgs() << "\n");
6216 ReplaceUses(User
, ResNode
);
6220 void PPCDAGToDAGISel::PeepholeCROps() {
6224 for (SDNode
&Node
: CurDAG
->allnodes()) {
6225 MachineSDNode
*MachineNode
= dyn_cast
<MachineSDNode
>(&Node
);
6226 if (!MachineNode
|| MachineNode
->use_empty())
6228 SDNode
*ResNode
= MachineNode
;
6230 bool Op1Set
= false, Op1Unset
= false,
6232 Op2Set
= false, Op2Unset
= false,
6235 unsigned Opcode
= MachineNode
->getMachineOpcode();
6246 SDValue Op
= MachineNode
->getOperand(1);
6247 if (Op
.isMachineOpcode()) {
6248 if (Op
.getMachineOpcode() == PPC::CRSET
)
6250 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
6252 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
6253 Op
.getOperand(0) == Op
.getOperand(1))
6260 case PPC::SELECT_I4
:
6261 case PPC::SELECT_I8
:
6262 case PPC::SELECT_F4
:
6263 case PPC::SELECT_F8
:
6264 case PPC::SELECT_SPE
:
6265 case PPC::SELECT_SPE4
:
6266 case PPC::SELECT_VRRC
:
6267 case PPC::SELECT_VSFRC
:
6268 case PPC::SELECT_VSSRC
:
6269 case PPC::SELECT_VSRC
: {
6270 SDValue Op
= MachineNode
->getOperand(0);
6271 if (Op
.isMachineOpcode()) {
6272 if (Op
.getMachineOpcode() == PPC::CRSET
)
6274 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
6276 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
6277 Op
.getOperand(0) == Op
.getOperand(1))
6284 bool SelectSwap
= false;
6288 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6290 ResNode
= MachineNode
->getOperand(0).getNode();
6293 ResNode
= MachineNode
->getOperand(1).getNode();
6296 ResNode
= MachineNode
->getOperand(0).getNode();
6297 else if (Op1Unset
|| Op2Unset
)
6298 // x & 0 = 0 & y = 0
6299 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
6302 // ~x & y = andc(y, x)
6303 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
6304 MVT::i1
, MachineNode
->getOperand(1),
6305 MachineNode
->getOperand(0).
6308 // x & ~y = andc(x, y)
6309 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
6310 MVT::i1
, MachineNode
->getOperand(0),
6311 MachineNode
->getOperand(1).
6313 else if (AllUsersSelectZero(MachineNode
)) {
6314 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
6315 MVT::i1
, MachineNode
->getOperand(0),
6316 MachineNode
->getOperand(1));
6321 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6322 // nand(x, x) -> nor(x, x)
6323 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6324 MVT::i1
, MachineNode
->getOperand(0),
6325 MachineNode
->getOperand(0));
6327 // nand(1, y) -> nor(y, y)
6328 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6329 MVT::i1
, MachineNode
->getOperand(1),
6330 MachineNode
->getOperand(1));
6332 // nand(x, 1) -> nor(x, x)
6333 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6334 MVT::i1
, MachineNode
->getOperand(0),
6335 MachineNode
->getOperand(0));
6336 else if (Op1Unset
|| Op2Unset
)
6337 // nand(x, 0) = nand(0, y) = 1
6338 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
6341 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6342 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
6343 MVT::i1
, MachineNode
->getOperand(0).
6345 MachineNode
->getOperand(1));
6347 // nand(x, ~y) = ~x | y = orc(y, x)
6348 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
6349 MVT::i1
, MachineNode
->getOperand(1).
6351 MachineNode
->getOperand(0));
6352 else if (AllUsersSelectZero(MachineNode
)) {
6353 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
6354 MVT::i1
, MachineNode
->getOperand(0),
6355 MachineNode
->getOperand(1));
6360 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6362 ResNode
= MachineNode
->getOperand(0).getNode();
6363 else if (Op1Set
|| Op2Set
)
6364 // x | 1 = 1 | y = 1
6365 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
6369 ResNode
= MachineNode
->getOperand(1).getNode();
6372 ResNode
= MachineNode
->getOperand(0).getNode();
6374 // ~x | y = orc(y, x)
6375 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
6376 MVT::i1
, MachineNode
->getOperand(1),
6377 MachineNode
->getOperand(0).
6380 // x | ~y = orc(x, y)
6381 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
6382 MVT::i1
, MachineNode
->getOperand(0),
6383 MachineNode
->getOperand(1).
6385 else if (AllUsersSelectZero(MachineNode
)) {
6386 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6387 MVT::i1
, MachineNode
->getOperand(0),
6388 MachineNode
->getOperand(1));
6393 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6395 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
6398 // xor(1, y) -> nor(y, y)
6399 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6400 MVT::i1
, MachineNode
->getOperand(1),
6401 MachineNode
->getOperand(1));
6403 // xor(x, 1) -> nor(x, x)
6404 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6405 MVT::i1
, MachineNode
->getOperand(0),
6406 MachineNode
->getOperand(0));
6409 ResNode
= MachineNode
->getOperand(1).getNode();
6412 ResNode
= MachineNode
->getOperand(0).getNode();
6414 // xor(~x, y) = eqv(x, y)
6415 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
6416 MVT::i1
, MachineNode
->getOperand(0).
6418 MachineNode
->getOperand(1));
6420 // xor(x, ~y) = eqv(x, y)
6421 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
6422 MVT::i1
, MachineNode
->getOperand(0),
6423 MachineNode
->getOperand(1).
6425 else if (AllUsersSelectZero(MachineNode
)) {
6426 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
6427 MVT::i1
, MachineNode
->getOperand(0),
6428 MachineNode
->getOperand(1));
6433 if (Op1Set
|| Op2Set
)
6435 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
6438 // nor(0, y) = ~y -> nor(y, y)
6439 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6440 MVT::i1
, MachineNode
->getOperand(1),
6441 MachineNode
->getOperand(1));
6444 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6445 MVT::i1
, MachineNode
->getOperand(0),
6446 MachineNode
->getOperand(0));
6448 // nor(~x, y) = andc(x, y)
6449 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
6450 MVT::i1
, MachineNode
->getOperand(0).
6452 MachineNode
->getOperand(1));
6454 // nor(x, ~y) = andc(y, x)
6455 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
6456 MVT::i1
, MachineNode
->getOperand(1).
6458 MachineNode
->getOperand(0));
6459 else if (AllUsersSelectZero(MachineNode
)) {
6460 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
6461 MVT::i1
, MachineNode
->getOperand(0),
6462 MachineNode
->getOperand(1));
6467 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6469 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
6473 ResNode
= MachineNode
->getOperand(1).getNode();
6476 ResNode
= MachineNode
->getOperand(0).getNode();
6478 // eqv(0, y) = ~y -> nor(y, y)
6479 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6480 MVT::i1
, MachineNode
->getOperand(1),
6481 MachineNode
->getOperand(1));
6484 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6485 MVT::i1
, MachineNode
->getOperand(0),
6486 MachineNode
->getOperand(0));
6488 // eqv(~x, y) = xor(x, y)
6489 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
6490 MVT::i1
, MachineNode
->getOperand(0).
6492 MachineNode
->getOperand(1));
6494 // eqv(x, ~y) = xor(x, y)
6495 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
6496 MVT::i1
, MachineNode
->getOperand(0),
6497 MachineNode
->getOperand(1).
6499 else if (AllUsersSelectZero(MachineNode
)) {
6500 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
6501 MVT::i1
, MachineNode
->getOperand(0),
6502 MachineNode
->getOperand(1));
6507 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6509 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
6513 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6514 MVT::i1
, MachineNode
->getOperand(1),
6515 MachineNode
->getOperand(1));
6516 else if (Op1Unset
|| Op2Set
)
6517 // andc(0, y) = andc(x, 1) = 0
6518 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
6522 ResNode
= MachineNode
->getOperand(0).getNode();
6524 // andc(~x, y) = ~(x | y) = nor(x, y)
6525 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6526 MVT::i1
, MachineNode
->getOperand(0).
6528 MachineNode
->getOperand(1));
6530 // andc(x, ~y) = x & y
6531 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
6532 MVT::i1
, MachineNode
->getOperand(0),
6533 MachineNode
->getOperand(1).
6535 else if (AllUsersSelectZero(MachineNode
)) {
6536 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
6537 MVT::i1
, MachineNode
->getOperand(1),
6538 MachineNode
->getOperand(0));
6543 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
6545 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
6547 else if (Op1Set
|| Op2Unset
)
6548 // orc(1, y) = orc(x, 0) = 1
6549 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
6553 ResNode
= MachineNode
->getOperand(0).getNode();
6556 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
6557 MVT::i1
, MachineNode
->getOperand(1),
6558 MachineNode
->getOperand(1));
6560 // orc(~x, y) = ~(x & y) = nand(x, y)
6561 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
6562 MVT::i1
, MachineNode
->getOperand(0).
6564 MachineNode
->getOperand(1));
6566 // orc(x, ~y) = x | y
6567 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
6568 MVT::i1
, MachineNode
->getOperand(0),
6569 MachineNode
->getOperand(1).
6571 else if (AllUsersSelectZero(MachineNode
)) {
6572 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
6573 MVT::i1
, MachineNode
->getOperand(1),
6574 MachineNode
->getOperand(0));
6578 case PPC::SELECT_I4
:
6579 case PPC::SELECT_I8
:
6580 case PPC::SELECT_F4
:
6581 case PPC::SELECT_F8
:
6582 case PPC::SELECT_SPE
:
6583 case PPC::SELECT_SPE4
:
6584 case PPC::SELECT_VRRC
:
6585 case PPC::SELECT_VSFRC
:
6586 case PPC::SELECT_VSSRC
:
6587 case PPC::SELECT_VSRC
:
6589 ResNode
= MachineNode
->getOperand(1).getNode();
6591 ResNode
= MachineNode
->getOperand(2).getNode();
6593 ResNode
= CurDAG
->getMachineNode(MachineNode
->getMachineOpcode(),
6595 MachineNode
->getValueType(0),
6596 MachineNode
->getOperand(0).
6598 MachineNode
->getOperand(2),
6599 MachineNode
->getOperand(1));
6604 ResNode
= CurDAG
->getMachineNode(Opcode
== PPC::BC
? PPC::BCn
:
6608 MachineNode
->getOperand(0).
6610 MachineNode
->getOperand(1),
6611 MachineNode
->getOperand(2));
6612 // FIXME: Handle Op1Set, Op1Unset here too.
6616 // If we're inverting this node because it is used only by selects that
6617 // we'd like to swap, then swap the selects before the node replacement.
6619 SwapAllSelectUsers(MachineNode
);
6621 if (ResNode
!= MachineNode
) {
6622 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6623 LLVM_DEBUG(MachineNode
->dump(CurDAG
));
6624 LLVM_DEBUG(dbgs() << "\nNew: ");
6625 LLVM_DEBUG(ResNode
->dump(CurDAG
));
6626 LLVM_DEBUG(dbgs() << "\n");
6628 ReplaceUses(MachineNode
, ResNode
);
6633 CurDAG
->RemoveDeadNodes();
6634 } while (IsModified
);
6637 // Gather the set of 32-bit operations that are known to have their
6638 // higher-order 32 bits zero, where ToPromote contains all such operations.
6639 static bool PeepholePPC64ZExtGather(SDValue Op32
,
6640 SmallPtrSetImpl
<SDNode
*> &ToPromote
) {
6641 if (!Op32
.isMachineOpcode())
6644 // First, check for the "frontier" instructions (those that will clear the
6645 // higher-order 32 bits.
6647 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
6648 // around. If it does not, then these instructions will clear the
6649 // higher-order bits.
6650 if ((Op32
.getMachineOpcode() == PPC::RLWINM
||
6651 Op32
.getMachineOpcode() == PPC::RLWNM
) &&
6652 Op32
.getConstantOperandVal(2) <= Op32
.getConstantOperandVal(3)) {
6653 ToPromote
.insert(Op32
.getNode());
6657 // SLW and SRW always clear the higher-order bits.
6658 if (Op32
.getMachineOpcode() == PPC::SLW
||
6659 Op32
.getMachineOpcode() == PPC::SRW
) {
6660 ToPromote
.insert(Op32
.getNode());
6664 // For LI and LIS, we need the immediate to be positive (so that it is not
6666 if (Op32
.getMachineOpcode() == PPC::LI
||
6667 Op32
.getMachineOpcode() == PPC::LIS
) {
6668 if (!isUInt
<15>(Op32
.getConstantOperandVal(0)))
6671 ToPromote
.insert(Op32
.getNode());
6675 // LHBRX and LWBRX always clear the higher-order bits.
6676 if (Op32
.getMachineOpcode() == PPC::LHBRX
||
6677 Op32
.getMachineOpcode() == PPC::LWBRX
) {
6678 ToPromote
.insert(Op32
.getNode());
6682 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
6683 if (Op32
.getMachineOpcode() == PPC::CNTLZW
||
6684 Op32
.getMachineOpcode() == PPC::CNTTZW
) {
6685 ToPromote
.insert(Op32
.getNode());
6689 // Next, check for those instructions we can look through.
6691 // Assuming the mask does not wrap around, then the higher-order bits are
6692 // taken directly from the first operand.
6693 if (Op32
.getMachineOpcode() == PPC::RLWIMI
&&
6694 Op32
.getConstantOperandVal(3) <= Op32
.getConstantOperandVal(4)) {
6695 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6696 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
6699 ToPromote
.insert(Op32
.getNode());
6700 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6704 // For OR, the higher-order bits are zero if that is true for both operands.
6705 // For SELECT_I4, the same is true (but the relevant operand numbers are
6707 if (Op32
.getMachineOpcode() == PPC::OR
||
6708 Op32
.getMachineOpcode() == PPC::SELECT_I4
) {
6709 unsigned B
= Op32
.getMachineOpcode() == PPC::SELECT_I4
? 1 : 0;
6710 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6711 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+0), ToPromote1
))
6713 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+1), ToPromote1
))
6716 ToPromote
.insert(Op32
.getNode());
6717 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6721 // For ORI and ORIS, we need the higher-order bits of the first operand to be
6722 // zero, and also for the constant to be positive (so that it is not sign
6724 if (Op32
.getMachineOpcode() == PPC::ORI
||
6725 Op32
.getMachineOpcode() == PPC::ORIS
) {
6726 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6727 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
6729 if (!isUInt
<15>(Op32
.getConstantOperandVal(1)))
6732 ToPromote
.insert(Op32
.getNode());
6733 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6737 // The higher-order bits of AND are zero if that is true for at least one of
6739 if (Op32
.getMachineOpcode() == PPC::AND
) {
6740 SmallPtrSet
<SDNode
*, 16> ToPromote1
, ToPromote2
;
6742 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
6744 PeepholePPC64ZExtGather(Op32
.getOperand(1), ToPromote2
);
6745 if (!Op0OK
&& !Op1OK
)
6748 ToPromote
.insert(Op32
.getNode());
6751 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6754 ToPromote
.insert(ToPromote2
.begin(), ToPromote2
.end());
6759 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
6760 // of the first operand, or if the second operand is positive (so that it is
6761 // not sign extended).
6762 if (Op32
.getMachineOpcode() == PPC::ANDI_rec
||
6763 Op32
.getMachineOpcode() == PPC::ANDIS_rec
) {
6764 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
6766 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
6767 bool Op1OK
= isUInt
<15>(Op32
.getConstantOperandVal(1));
6768 if (!Op0OK
&& !Op1OK
)
6771 ToPromote
.insert(Op32
.getNode());
6774 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
6782 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
6783 if (!Subtarget
->isPPC64())
6786 // When we zero-extend from i32 to i64, we use a pattern like this:
6787 // def : Pat<(i64 (zext i32:$in)),
6788 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
6790 // There are several 32-bit shift/rotate instructions, however, that will
6791 // clear the higher-order bits of their output, rendering the RLDICL
6792 // unnecessary. When that happens, we remove it here, and redefine the
6793 // relevant 32-bit operation to be a 64-bit operation.
6795 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
6797 bool MadeChange
= false;
6798 while (Position
!= CurDAG
->allnodes_begin()) {
6799 SDNode
*N
= &*--Position
;
6800 // Skip dead nodes and any non-machine opcodes.
6801 if (N
->use_empty() || !N
->isMachineOpcode())
6804 if (N
->getMachineOpcode() != PPC::RLDICL
)
6807 if (N
->getConstantOperandVal(1) != 0 ||
6808 N
->getConstantOperandVal(2) != 32)
6811 SDValue ISR
= N
->getOperand(0);
6812 if (!ISR
.isMachineOpcode() ||
6813 ISR
.getMachineOpcode() != TargetOpcode::INSERT_SUBREG
)
6816 if (!ISR
.hasOneUse())
6819 if (ISR
.getConstantOperandVal(2) != PPC::sub_32
)
6822 SDValue IDef
= ISR
.getOperand(0);
6823 if (!IDef
.isMachineOpcode() ||
6824 IDef
.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF
)
6827 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
6828 // can get rid of it.
6830 SDValue Op32
= ISR
->getOperand(1);
6831 if (!Op32
.isMachineOpcode())
6834 // There are some 32-bit instructions that always clear the high-order 32
6835 // bits, there are also some instructions (like AND) that we can look
6837 SmallPtrSet
<SDNode
*, 16> ToPromote
;
6838 if (!PeepholePPC64ZExtGather(Op32
, ToPromote
))
6841 // If the ToPromote set contains nodes that have uses outside of the set
6842 // (except for the original INSERT_SUBREG), then abort the transformation.
6843 bool OutsideUse
= false;
6844 for (SDNode
*PN
: ToPromote
) {
6845 for (SDNode
*UN
: PN
->uses()) {
6846 if (!ToPromote
.count(UN
) && UN
!= ISR
.getNode()) {
6860 // We now know that this zero extension can be removed by promoting to
6861 // nodes in ToPromote to 64-bit operations, where for operations in the
6862 // frontier of the set, we need to insert INSERT_SUBREGs for their
6864 for (SDNode
*PN
: ToPromote
) {
6866 switch (PN
->getMachineOpcode()) {
6868 llvm_unreachable("Don't know the 64-bit variant of this instruction");
6869 case PPC::RLWINM
: NewOpcode
= PPC::RLWINM8
; break;
6870 case PPC::RLWNM
: NewOpcode
= PPC::RLWNM8
; break;
6871 case PPC::SLW
: NewOpcode
= PPC::SLW8
; break;
6872 case PPC::SRW
: NewOpcode
= PPC::SRW8
; break;
6873 case PPC::LI
: NewOpcode
= PPC::LI8
; break;
6874 case PPC::LIS
: NewOpcode
= PPC::LIS8
; break;
6875 case PPC::LHBRX
: NewOpcode
= PPC::LHBRX8
; break;
6876 case PPC::LWBRX
: NewOpcode
= PPC::LWBRX8
; break;
6877 case PPC::CNTLZW
: NewOpcode
= PPC::CNTLZW8
; break;
6878 case PPC::CNTTZW
: NewOpcode
= PPC::CNTTZW8
; break;
6879 case PPC::RLWIMI
: NewOpcode
= PPC::RLWIMI8
; break;
6880 case PPC::OR
: NewOpcode
= PPC::OR8
; break;
6881 case PPC::SELECT_I4
: NewOpcode
= PPC::SELECT_I8
; break;
6882 case PPC::ORI
: NewOpcode
= PPC::ORI8
; break;
6883 case PPC::ORIS
: NewOpcode
= PPC::ORIS8
; break;
6884 case PPC::AND
: NewOpcode
= PPC::AND8
; break;
6886 NewOpcode
= PPC::ANDI8_rec
;
6888 case PPC::ANDIS_rec
:
6889 NewOpcode
= PPC::ANDIS8_rec
;
6893 // Note: During the replacement process, the nodes will be in an
6894 // inconsistent state (some instructions will have operands with values
6895 // of the wrong type). Once done, however, everything should be right
6898 SmallVector
<SDValue
, 4> Ops
;
6899 for (const SDValue
&V
: PN
->ops()) {
6900 if (!ToPromote
.count(V
.getNode()) && V
.getValueType() == MVT::i32
&&
6901 !isa
<ConstantSDNode
>(V
)) {
6902 SDValue ReplOpOps
[] = { ISR
.getOperand(0), V
, ISR
.getOperand(2) };
6904 CurDAG
->getMachineNode(TargetOpcode::INSERT_SUBREG
, SDLoc(V
),
6905 ISR
.getNode()->getVTList(), ReplOpOps
);
6906 Ops
.push_back(SDValue(ReplOp
, 0));
6912 // Because all to-be-promoted nodes only have users that are other
6913 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6914 // the i32 result value type with i64.
6916 SmallVector
<EVT
, 2> NewVTs
;
6917 SDVTList VTs
= PN
->getVTList();
6918 for (unsigned i
= 0, ie
= VTs
.NumVTs
; i
!= ie
; ++i
)
6919 if (VTs
.VTs
[i
] == MVT::i32
)
6920 NewVTs
.push_back(MVT::i64
);
6922 NewVTs
.push_back(VTs
.VTs
[i
]);
6924 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
6925 LLVM_DEBUG(PN
->dump(CurDAG
));
6927 CurDAG
->SelectNodeTo(PN
, NewOpcode
, CurDAG
->getVTList(NewVTs
), Ops
);
6929 LLVM_DEBUG(dbgs() << "\nNew: ");
6930 LLVM_DEBUG(PN
->dump(CurDAG
));
6931 LLVM_DEBUG(dbgs() << "\n");
6934 // Now we replace the original zero extend and its associated INSERT_SUBREG
6935 // with the value feeding the INSERT_SUBREG (which has now been promoted to
6938 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
6939 LLVM_DEBUG(N
->dump(CurDAG
));
6940 LLVM_DEBUG(dbgs() << "\nNew: ");
6941 LLVM_DEBUG(Op32
.getNode()->dump(CurDAG
));
6942 LLVM_DEBUG(dbgs() << "\n");
6944 ReplaceUses(N
, Op32
.getNode());
6948 CurDAG
->RemoveDeadNodes();
6951 static bool isVSXSwap(SDValue N
) {
6952 if (!N
->isMachineOpcode())
6954 unsigned Opc
= N
->getMachineOpcode();
6956 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
6958 if (Opc
== PPC::XXPERMDIs
) {
6959 return isa
<ConstantSDNode
>(N
->getOperand(1)) &&
6960 N
->getConstantOperandVal(1) == 2;
6961 } else if (Opc
== PPC::XXPERMDI
|| Opc
== PPC::XXSLDWI
) {
6962 return N
->getOperand(0) == N
->getOperand(1) &&
6963 isa
<ConstantSDNode
>(N
->getOperand(2)) &&
6964 N
->getConstantOperandVal(2) == 2;
6970 // TODO: Make this complete and replace with a table-gen bit.
6971 static bool isLaneInsensitive(SDValue N
) {
6972 if (!N
->isMachineOpcode())
6974 unsigned Opc
= N
->getMachineOpcode();
7018 // Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7019 // lane-insensitive.
7020 static void reduceVSXSwap(SDNode
*N
, SelectionDAG
*DAG
) {
7021 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7022 // TODO: Can we put this a common method for DAG?
7023 auto SkipRCCopy
= [](SDValue V
) {
7024 while (V
->isMachineOpcode() &&
7025 V
->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS
) {
7026 // All values in the chain should have single use.
7027 if (V
->use_empty() || !V
->use_begin()->isOnlyUserOf(V
.getNode()))
7029 V
= V
->getOperand(0);
7031 return V
.hasOneUse() ? V
: SDValue();
7034 SDValue VecOp
= SkipRCCopy(N
->getOperand(0));
7035 if (!VecOp
|| !isLaneInsensitive(VecOp
))
7038 SDValue LHS
= SkipRCCopy(VecOp
.getOperand(0)),
7039 RHS
= SkipRCCopy(VecOp
.getOperand(1));
7040 if (!LHS
|| !RHS
|| !isVSXSwap(LHS
) || !isVSXSwap(RHS
))
7043 // These swaps may still have chain-uses here, count on dead code elimination
7044 // in following passes to remove them.
7045 DAG
->ReplaceAllUsesOfValueWith(LHS
, LHS
.getOperand(0));
7046 DAG
->ReplaceAllUsesOfValueWith(RHS
, RHS
.getOperand(0));
7047 DAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), N
->getOperand(0));
7050 void PPCDAGToDAGISel::PeepholePPC64() {
7051 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_end();
7053 while (Position
!= CurDAG
->allnodes_begin()) {
7054 SDNode
*N
= &*--Position
;
7055 // Skip dead nodes and any non-machine opcodes.
7056 if (N
->use_empty() || !N
->isMachineOpcode())
7059 if (isVSXSwap(SDValue(N
, 0)))
7060 reduceVSXSwap(N
, CurDAG
);
7063 unsigned StorageOpcode
= N
->getMachineOpcode();
7064 bool RequiresMod4Offset
= false;
7066 switch (StorageOpcode
) {
7071 case PPC::DFLOADf64
:
7072 case PPC::DFLOADf32
:
7073 RequiresMod4Offset
= true;
7089 case PPC::DFSTOREf64
:
7090 case PPC::DFSTOREf32
:
7091 RequiresMod4Offset
= true;
7105 // If this is a load or store with a zero offset, or within the alignment,
7106 // we may be able to fold an add-immediate into the memory operation.
7107 // The check against alignment is below, as it can't occur until we check
7108 // the arguments to N
7109 if (!isa
<ConstantSDNode
>(N
->getOperand(FirstOp
)))
7112 SDValue Base
= N
->getOperand(FirstOp
+ 1);
7113 if (!Base
.isMachineOpcode())
7117 bool ReplaceFlags
= true;
7119 // When the feeding operation is an add-immediate of some sort,
7120 // determine whether we need to add relocation information to the
7121 // target flags on the immediate operand when we fold it into the
7122 // load instruction.
7124 // For something like ADDItocL, the relocation information is
7125 // inferred from the opcode; when we process it in the AsmPrinter,
7126 // we add the necessary relocation there. A load, though, can receive
7127 // relocation from various flavors of ADDIxxx, so we need to carry
7128 // the relocation information in the target flags.
7129 switch (Base
.getMachineOpcode()) {
7134 // In some cases (such as TLS) the relocation information
7135 // is already in place on the operand, so copying the operand
7137 ReplaceFlags
= false;
7138 // For these cases, the immediate may not be divisible by 4, in
7139 // which case the fold is illegal for DS-form instructions. (The
7140 // other cases provide aligned addresses and are always safe.)
7141 if (RequiresMod4Offset
&&
7142 (!isa
<ConstantSDNode
>(Base
.getOperand(1)) ||
7143 Base
.getConstantOperandVal(1) % 4 != 0))
7146 case PPC::ADDIdtprelL
:
7147 Flags
= PPCII::MO_DTPREL_LO
;
7149 case PPC::ADDItlsldL
:
7150 Flags
= PPCII::MO_TLSLD_LO
;
7153 Flags
= PPCII::MO_TOC_LO
;
7157 SDValue ImmOpnd
= Base
.getOperand(1);
7159 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7160 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7161 // we might have needed different @ha relocation values for the offset
7163 int MaxDisplacement
= 7;
7164 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
7165 const GlobalValue
*GV
= GA
->getGlobal();
7166 Align Alignment
= GV
->getPointerAlignment(CurDAG
->getDataLayout());
7167 MaxDisplacement
= std::min((int)Alignment
.value() - 1, MaxDisplacement
);
7170 bool UpdateHBase
= false;
7171 SDValue HBase
= Base
.getOperand(0);
7173 int Offset
= N
->getConstantOperandVal(FirstOp
);
7175 if (Offset
< 0 || Offset
> MaxDisplacement
) {
7176 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7177 // one use, then we can do this for any offset, we just need to also
7178 // update the offset (i.e. the symbol addend) on the addis also.
7179 if (Base
.getMachineOpcode() != PPC::ADDItocL
)
7182 if (!HBase
.isMachineOpcode() ||
7183 HBase
.getMachineOpcode() != PPC::ADDIStocHA8
)
7186 if (!Base
.hasOneUse() || !HBase
.hasOneUse())
7189 SDValue HImmOpnd
= HBase
.getOperand(1);
7190 if (HImmOpnd
!= ImmOpnd
)
7196 // If we're directly folding the addend from an addi instruction, then:
7197 // 1. In general, the offset on the memory access must be zero.
7198 // 2. If the addend is a constant, then it can be combined with a
7199 // non-zero offset, but only if the result meets the encoding
7201 if (auto *C
= dyn_cast
<ConstantSDNode
>(ImmOpnd
)) {
7202 Offset
+= C
->getSExtValue();
7204 if (RequiresMod4Offset
&& (Offset
% 4) != 0)
7207 if (!isInt
<16>(Offset
))
7210 ImmOpnd
= CurDAG
->getTargetConstant(Offset
, SDLoc(ImmOpnd
),
7211 ImmOpnd
.getValueType());
7212 } else if (Offset
!= 0) {
7217 // We found an opportunity. Reverse the operands from the add
7218 // immediate and substitute them into the load or store. If
7219 // needed, update the target flags for the immediate operand to
7220 // reflect the necessary relocation information.
7221 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7222 LLVM_DEBUG(Base
->dump(CurDAG
));
7223 LLVM_DEBUG(dbgs() << "\nN: ");
7224 LLVM_DEBUG(N
->dump(CurDAG
));
7225 LLVM_DEBUG(dbgs() << "\n");
7227 // If the relocation information isn't already present on the
7228 // immediate operand, add it now.
7230 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
7232 const GlobalValue
*GV
= GA
->getGlobal();
7233 Align Alignment
= GV
->getPointerAlignment(CurDAG
->getDataLayout());
7234 // We can't perform this optimization for data whose alignment
7235 // is insufficient for the instruction encoding.
7236 if (Alignment
< 4 && (RequiresMod4Offset
|| (Offset
% 4) != 0)) {
7237 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7240 ImmOpnd
= CurDAG
->getTargetGlobalAddress(GV
, dl
, MVT::i64
, Offset
, Flags
);
7241 } else if (ConstantPoolSDNode
*CP
=
7242 dyn_cast
<ConstantPoolSDNode
>(ImmOpnd
)) {
7243 const Constant
*C
= CP
->getConstVal();
7244 ImmOpnd
= CurDAG
->getTargetConstantPool(C
, MVT::i64
, CP
->getAlign(),
7249 if (FirstOp
== 1) // Store
7250 (void)CurDAG
->UpdateNodeOperands(N
, N
->getOperand(0), ImmOpnd
,
7251 Base
.getOperand(0), N
->getOperand(3));
7253 (void)CurDAG
->UpdateNodeOperands(N
, ImmOpnd
, Base
.getOperand(0),
7257 (void)CurDAG
->UpdateNodeOperands(HBase
.getNode(), HBase
.getOperand(0),
7260 // The add-immediate may now be dead, in which case remove it.
7261 if (Base
.getNode()->use_empty())
7262 CurDAG
->RemoveDeadNode(Base
.getNode());
7266 /// createPPCISelDag - This pass converts a legalized DAG into a
7267 /// PowerPC-specific DAG, ready for instruction scheduling.
7269 FunctionPass
*llvm::createPPCISelDag(PPCTargetMachine
&TM
,
7270 CodeGenOpt::Level OptLevel
) {
7271 return new PPCDAGToDAGISel(TM
, OptLevel
);