1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
13 //===----------------------------------------------------------------------===//
15 #include "MCTargetDesc/PPCMCTargetDesc.h"
16 #include "MCTargetDesc/PPCPredicates.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/BranchProbabilityInfo.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/MachineValueType.h"
36 #include "llvm/CodeGen/SelectionDAG.h"
37 #include "llvm/CodeGen/SelectionDAGISel.h"
38 #include "llvm/CodeGen/SelectionDAGNodes.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/KnownBits.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Target/TargetInstrInfo.h"
57 #include "llvm/Target/TargetRegisterInfo.h"
70 #define DEBUG_TYPE "ppc-codegen"
72 STATISTIC(NumSextSetcc
,
73 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc
,
75 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded
,
77 "Number of sign extensions for compare inputs added.");
78 STATISTIC(NumLogicOpsOnComparison
,
79 "Number of logical ops on i1 values calculated in GPR.");
80 STATISTIC(OmittedForNonExtendUses
,
81 "Number of compares not eliminated as they have non-extending uses.");
83 // FIXME: Remove this once the bug has been fixed!
84 cl::opt
<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
85 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden
);
88 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
89 cl::desc("use aggressive ppc isel for bit permutations"),
91 static cl::opt
<bool> BPermRewriterNoMasking(
92 "ppc-bit-perm-rewriter-stress-rotates",
93 cl::desc("stress rotate selection in aggressive ppc isel for "
97 static cl::opt
<bool> EnableBranchHint(
98 "ppc-use-branch-hint", cl::init(true),
99 cl::desc("Enable static hinting of branches on ppc"),
104 //===--------------------------------------------------------------------===//
105 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
106 /// instructions for SelectionDAG operations.
108 class PPCDAGToDAGISel
: public SelectionDAGISel
{
109 const PPCTargetMachine
&TM
;
110 const PPCSubtarget
*PPCSubTarget
;
111 const PPCTargetLowering
*PPCLowering
;
112 unsigned GlobalBaseReg
;
115 explicit PPCDAGToDAGISel(PPCTargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
116 : SelectionDAGISel(tm
, OptLevel
), TM(tm
) {}
118 bool runOnMachineFunction(MachineFunction
&MF
) override
{
119 // Make sure we re-emit a set of the global base reg if necessary
121 PPCSubTarget
= &MF
.getSubtarget
<PPCSubtarget
>();
122 PPCLowering
= PPCSubTarget
->getTargetLowering();
123 SelectionDAGISel::runOnMachineFunction(MF
);
125 if (!PPCSubTarget
->isSVR4ABI())
126 InsertVRSaveCode(MF
);
131 void PreprocessISelDAG() override
;
132 void PostprocessISelDAG() override
;
134 /// getI16Imm - Return a target constant with the specified value, of type
136 inline SDValue
getI16Imm(unsigned Imm
, const SDLoc
&dl
) {
137 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i16
);
140 /// getI32Imm - Return a target constant with the specified value, of type
142 inline SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
143 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
146 /// getI64Imm - Return a target constant with the specified value, of type
148 inline SDValue
getI64Imm(uint64_t Imm
, const SDLoc
&dl
) {
149 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i64
);
152 /// getSmallIPtrImm - Return a target constant of pointer type.
153 inline SDValue
getSmallIPtrImm(unsigned Imm
, const SDLoc
&dl
) {
154 return CurDAG
->getTargetConstant(
155 Imm
, dl
, PPCLowering
->getPointerTy(CurDAG
->getDataLayout()));
158 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
159 /// rotate and mask opcode and mask operation.
160 static bool isRotateAndMask(SDNode
*N
, unsigned Mask
, bool isShiftMask
,
161 unsigned &SH
, unsigned &MB
, unsigned &ME
);
163 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
164 /// base register. Return the virtual register that holds this value.
165 SDNode
*getGlobalBaseReg();
167 void selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
= 0);
169 // Select - Convert the specified operand from a target-independent to a
170 // target-specific node if it hasn't already been changed.
171 void Select(SDNode
*N
) override
;
173 bool tryBitfieldInsert(SDNode
*N
);
174 bool tryBitPermutation(SDNode
*N
);
176 /// SelectCC - Select a comparison of the specified values with the
177 /// specified condition code, returning the CR# of the expression.
178 SDValue
SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
181 /// SelectAddrImm - Returns true if the address N can be represented by
182 /// a base register plus a signed 16-bit displacement [r+imm].
183 bool SelectAddrImm(SDValue N
, SDValue
&Disp
,
185 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 0);
188 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
189 /// immediate field. Note that the operand at this point is already the
190 /// result of a prior SelectAddressRegImm call.
191 bool SelectAddrImmOffs(SDValue N
, SDValue
&Out
) const {
192 if (N
.getOpcode() == ISD::TargetConstant
||
193 N
.getOpcode() == ISD::TargetGlobalAddress
) {
201 /// SelectAddrIdx - Given the specified addressed, check to see if it can be
202 /// represented as an indexed [r+r] operation. Returns false if it can
203 /// be represented by [r+imm], which are preferred.
204 bool SelectAddrIdx(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
205 return PPCLowering
->SelectAddressRegReg(N
, Base
, Index
, *CurDAG
);
208 /// SelectAddrIdxOnly - Given the specified addressed, force it to be
209 /// represented as an indexed [r+r] operation.
210 bool SelectAddrIdxOnly(SDValue N
, SDValue
&Base
, SDValue
&Index
) {
211 return PPCLowering
->SelectAddressRegRegOnly(N
, Base
, Index
, *CurDAG
);
214 /// SelectAddrImmX4 - Returns true if the address N can be represented by
215 /// a base register plus a signed 16-bit displacement that is a multiple of 4.
216 /// Suitable for use by STD and friends.
217 bool SelectAddrImmX4(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
218 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 4);
221 bool SelectAddrImmX16(SDValue N
, SDValue
&Disp
, SDValue
&Base
) {
222 return PPCLowering
->SelectAddressRegImm(N
, Disp
, Base
, *CurDAG
, 16);
225 // Select an address into a single register.
226 bool SelectAddr(SDValue N
, SDValue
&Base
) {
231 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
232 /// inline asm expressions. It is always correct to compute the value into
233 /// a register. The case of adding a (possibly relocatable) constant to a
234 /// register can be improved, but it is wrong to substitute Reg+Reg for
235 /// Reg in an asm, because the load or store opcode would have to change.
236 bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
237 unsigned ConstraintID
,
238 std::vector
<SDValue
> &OutOps
) override
{
239 switch(ConstraintID
) {
241 errs() << "ConstraintID: " << ConstraintID
<< "\n";
242 llvm_unreachable("Unexpected asm memory constraint");
243 case InlineAsm::Constraint_es
:
244 case InlineAsm::Constraint_i
:
245 case InlineAsm::Constraint_m
:
246 case InlineAsm::Constraint_o
:
247 case InlineAsm::Constraint_Q
:
248 case InlineAsm::Constraint_Z
:
249 case InlineAsm::Constraint_Zy
:
250 // We need to make sure that this one operand does not end up in r0
251 // (because we might end up lowering this as 0(%op)).
252 const TargetRegisterInfo
*TRI
= PPCSubTarget
->getRegisterInfo();
253 const TargetRegisterClass
*TRC
= TRI
->getPointerRegClass(*MF
, /*Kind=*/1);
255 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), dl
, MVT::i32
);
257 SDValue(CurDAG
->getMachineNode(TargetOpcode::COPY_TO_REGCLASS
,
258 dl
, Op
.getValueType(),
261 OutOps
.push_back(NewOp
);
267 void InsertVRSaveCode(MachineFunction
&MF
);
269 StringRef
getPassName() const override
{
270 return "PowerPC DAG->DAG Pattern Instruction Selection";
273 // Include the pieces autogenerated from the target description.
274 #include "PPCGenDAGISel.inc"
277 // Conversion type for interpreting results of a 32-bit instruction as
278 // a 64-bit value or vice versa.
279 enum ExtOrTruncConversion
{ Ext
, Trunc
};
281 // Modifiers to guide how an ISD::SETCC node's result is to be computed
283 // ZExtOrig - use the original condition code, zero-extend value
284 // ZExtInvert - invert the condition code, zero-extend value
285 // SExtOrig - use the original condition code, sign-extend value
286 // SExtInvert - invert the condition code, sign-extend value
287 enum SetccInGPROpts
{ ZExtOrig
, ZExtInvert
, SExtOrig
, SExtInvert
};
289 // Comparisons against zero to emit GPR code sequences for. Each of these
290 // sequences may need to be emitted for two or more equivalent patterns.
291 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
292 // matters as well as the extension type: sext (-1/0), zext (1/0).
293 // GEZExt - (zext (LHS >= 0))
294 // GESExt - (sext (LHS >= 0))
295 // LEZExt - (zext (LHS <= 0))
296 // LESExt - (sext (LHS <= 0))
297 enum ZeroCompare
{ GEZExt
, GESExt
, LEZExt
, LESExt
};
299 bool trySETCC(SDNode
*N
);
300 bool tryEXTEND(SDNode
*N
);
301 bool tryLogicOpOfCompares(SDNode
*N
);
302 SDValue
computeLogicOpInGPR(SDValue LogicOp
);
303 SDValue
signExtendInputIfNeeded(SDValue Input
);
304 SDValue
addExtOrTrunc(SDValue NatWidthRes
, ExtOrTruncConversion Conv
);
305 SDValue
getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
307 SDValue
get32BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
308 int64_t RHSValue
, SDLoc dl
);
309 SDValue
get32BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
310 int64_t RHSValue
, SDLoc dl
);
311 SDValue
get64BitZExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
312 int64_t RHSValue
, SDLoc dl
);
313 SDValue
get64BitSExtCompare(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
314 int64_t RHSValue
, SDLoc dl
);
315 SDValue
getSETCCInGPR(SDValue Compare
, SetccInGPROpts ConvOpts
);
317 void PeepholePPC64();
318 void PeepholePPC64ZExt();
319 void PeepholeCROps();
321 SDValue
combineToCMPB(SDNode
*N
);
322 void foldBoolExts(SDValue
&Res
, SDNode
*&N
);
324 bool AllUsersSelectZero(SDNode
*N
);
325 void SwapAllSelectUsers(SDNode
*N
);
327 bool isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const;
328 void transferMemOperands(SDNode
*N
, SDNode
*Result
);
331 } // end anonymous namespace
333 /// InsertVRSaveCode - Once the entire function has been instruction selected,
334 /// all virtual registers are created and all machine instructions are built,
335 /// check to see if we need to save/restore VRSAVE. If so, do it.
336 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction
&Fn
) {
337 // Check to see if this function uses vector registers, which means we have to
338 // save and restore the VRSAVE register and update it with the regs we use.
340 // In this case, there will be virtual registers of vector type created
341 // by the scheduler. Detect them now.
342 bool HasVectorVReg
= false;
343 for (unsigned i
= 0, e
= RegInfo
->getNumVirtRegs(); i
!= e
; ++i
) {
344 unsigned Reg
= TargetRegisterInfo::index2VirtReg(i
);
345 if (RegInfo
->getRegClass(Reg
) == &PPC::VRRCRegClass
) {
346 HasVectorVReg
= true;
350 if (!HasVectorVReg
) return; // nothing to do.
352 // If we have a vector register, we want to emit code into the entry and exit
353 // blocks to save and restore the VRSAVE register. We do this here (instead
354 // of marking all vector instructions as clobbering VRSAVE) for two reasons:
356 // 1. This (trivially) reduces the load on the register allocator, by not
357 // having to represent the live range of the VRSAVE register.
358 // 2. This (more significantly) allows us to create a temporary virtual
359 // register to hold the saved VRSAVE value, allowing this temporary to be
360 // register allocated, instead of forcing it to be spilled to the stack.
362 // Create two vregs - one to hold the VRSAVE register that is live-in to the
363 // function and one for the value after having bits or'd into it.
364 unsigned InVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
365 unsigned UpdatedVRSAVE
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
367 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
368 MachineBasicBlock
&EntryBB
= *Fn
.begin();
370 // Emit the following code into the entry block:
371 // InVRSAVE = MFVRSAVE
372 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
373 // MTVRSAVE UpdatedVRSAVE
374 MachineBasicBlock::iterator IP
= EntryBB
.begin(); // Insert Point
375 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MFVRSAVE
), InVRSAVE
);
376 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::UPDATE_VRSAVE
),
377 UpdatedVRSAVE
).addReg(InVRSAVE
);
378 BuildMI(EntryBB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(UpdatedVRSAVE
);
380 // Find all return blocks, outputting a restore in each epilog.
381 for (MachineFunction::iterator BB
= Fn
.begin(), E
= Fn
.end(); BB
!= E
; ++BB
) {
382 if (BB
->isReturnBlock()) {
383 IP
= BB
->end(); --IP
;
385 // Skip over all terminator instructions, which are part of the return
387 MachineBasicBlock::iterator I2
= IP
;
388 while (I2
!= BB
->begin() && (--I2
)->isTerminator())
391 // Emit: MTVRSAVE InVRSave
392 BuildMI(*BB
, IP
, dl
, TII
.get(PPC::MTVRSAVE
)).addReg(InVRSAVE
);
397 /// getGlobalBaseReg - Output the instructions required to put the
398 /// base address to use for accessing globals into a register.
400 SDNode
*PPCDAGToDAGISel::getGlobalBaseReg() {
401 if (!GlobalBaseReg
) {
402 const TargetInstrInfo
&TII
= *PPCSubTarget
->getInstrInfo();
403 // Insert the set of GlobalBaseReg into the first MBB of the function
404 MachineBasicBlock
&FirstMBB
= MF
->front();
405 MachineBasicBlock::iterator MBBI
= FirstMBB
.begin();
406 const Module
*M
= MF
->getFunction()->getParent();
409 if (PPCLowering
->getPointerTy(CurDAG
->getDataLayout()) == MVT::i32
) {
410 if (PPCSubTarget
->isTargetELF()) {
411 GlobalBaseReg
= PPC::R30
;
412 if (M
->getPICLevel() == PICLevel::SmallPIC
) {
413 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MoveGOTtoLR
));
414 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
415 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
417 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
418 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
419 unsigned TempReg
= RegInfo
->createVirtualRegister(&PPC::GPRCRegClass
);
420 BuildMI(FirstMBB
, MBBI
, dl
,
421 TII
.get(PPC::UpdateGBR
), GlobalBaseReg
)
422 .addReg(TempReg
, RegState::Define
).addReg(GlobalBaseReg
);
423 MF
->getInfo
<PPCFunctionInfo
>()->setUsesPICBase(true);
427 RegInfo
->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass
);
428 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR
));
429 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR
), GlobalBaseReg
);
432 GlobalBaseReg
= RegInfo
->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass
);
433 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MovePCtoLR8
));
434 BuildMI(FirstMBB
, MBBI
, dl
, TII
.get(PPC::MFLR8
), GlobalBaseReg
);
437 return CurDAG
->getRegister(GlobalBaseReg
,
438 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()))
442 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
443 /// operand. If so Imm will receive the 32-bit value.
444 static bool isInt32Immediate(SDNode
*N
, unsigned &Imm
) {
445 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i32
) {
446 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
452 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
453 /// operand. If so Imm will receive the 64-bit value.
454 static bool isInt64Immediate(SDNode
*N
, uint64_t &Imm
) {
455 if (N
->getOpcode() == ISD::Constant
&& N
->getValueType(0) == MVT::i64
) {
456 Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
462 // isInt32Immediate - This method tests to see if a constant operand.
463 // If so Imm will receive the 32 bit value.
464 static bool isInt32Immediate(SDValue N
, unsigned &Imm
) {
465 return isInt32Immediate(N
.getNode(), Imm
);
468 /// isInt64Immediate - This method tests to see if the value is a 64-bit
469 /// constant operand. If so Imm will receive the 64-bit value.
470 static bool isInt64Immediate(SDValue N
, uint64_t &Imm
) {
471 return isInt64Immediate(N
.getNode(), Imm
);
474 static unsigned getBranchHint(unsigned PCC
, FunctionLoweringInfo
*FuncInfo
,
475 const SDValue
&DestMBB
) {
476 assert(isa
<BasicBlockSDNode
>(DestMBB
));
478 if (!FuncInfo
->BPI
) return PPC::BR_NO_HINT
;
480 const BasicBlock
*BB
= FuncInfo
->MBB
->getBasicBlock();
481 const TerminatorInst
*BBTerm
= BB
->getTerminator();
483 if (BBTerm
->getNumSuccessors() != 2) return PPC::BR_NO_HINT
;
485 const BasicBlock
*TBB
= BBTerm
->getSuccessor(0);
486 const BasicBlock
*FBB
= BBTerm
->getSuccessor(1);
488 auto TProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, TBB
);
489 auto FProb
= FuncInfo
->BPI
->getEdgeProbability(BB
, FBB
);
491 // We only want to handle cases which are easy to predict at static time, e.g.
492 // C++ throw statement, that is very likely not taken, or calling never
493 // returned function, e.g. stdlib exit(). So we set Threshold to filter
496 // Below is LLVM branch weight table, we only want to handle case 1, 2
498 // Case Taken:Nontaken Example
499 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
500 // 2. Invoke-terminating 1:1048575
501 // 3. Coldblock 4:64 __builtin_expect
502 // 4. Loop Branch 124:4 For loop
503 // 5. PH/ZH/FPH 20:12
504 const uint32_t Threshold
= 10000;
506 if (std::max(TProb
, FProb
) / Threshold
< std::min(TProb
, FProb
))
507 return PPC::BR_NO_HINT
;
509 DEBUG(dbgs() << "Use branch hint for '" << FuncInfo
->Fn
->getName() << "::"
510 << BB
->getName() << "'\n"
511 << " -> " << TBB
->getName() << ": " << TProb
<< "\n"
512 << " -> " << FBB
->getName() << ": " << FProb
<< "\n");
514 const BasicBlockSDNode
*BBDN
= cast
<BasicBlockSDNode
>(DestMBB
);
516 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
517 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
518 if (BBDN
->getBasicBlock()->getBasicBlock() != TBB
)
519 std::swap(TProb
, FProb
);
521 return (TProb
> FProb
) ? PPC::BR_TAKEN_HINT
: PPC::BR_NONTAKEN_HINT
;
524 // isOpcWithIntImmediate - This method tests to see if the node is a specific
525 // opcode and that it has a immediate integer right operand.
526 // If so Imm will receive the 32 bit value.
527 static bool isOpcWithIntImmediate(SDNode
*N
, unsigned Opc
, unsigned& Imm
) {
528 return N
->getOpcode() == Opc
529 && isInt32Immediate(N
->getOperand(1).getNode(), Imm
);
532 void PPCDAGToDAGISel::selectFrameIndex(SDNode
*SN
, SDNode
*N
, unsigned Offset
) {
534 int FI
= cast
<FrameIndexSDNode
>(N
)->getIndex();
535 SDValue TFI
= CurDAG
->getTargetFrameIndex(FI
, N
->getValueType(0));
536 unsigned Opc
= N
->getValueType(0) == MVT::i32
? PPC::ADDI
: PPC::ADDI8
;
538 CurDAG
->SelectNodeTo(SN
, Opc
, N
->getValueType(0), TFI
,
539 getSmallIPtrImm(Offset
, dl
));
541 ReplaceNode(SN
, CurDAG
->getMachineNode(Opc
, dl
, N
->getValueType(0), TFI
,
542 getSmallIPtrImm(Offset
, dl
)));
545 bool PPCDAGToDAGISel::isRotateAndMask(SDNode
*N
, unsigned Mask
,
546 bool isShiftMask
, unsigned &SH
,
547 unsigned &MB
, unsigned &ME
) {
548 // Don't even go down this path for i64, since different logic will be
549 // necessary for rldicl/rldicr/rldimi.
550 if (N
->getValueType(0) != MVT::i32
)
554 unsigned Indeterminant
= ~0; // bit mask marking indeterminant results
555 unsigned Opcode
= N
->getOpcode();
556 if (N
->getNumOperands() != 2 ||
557 !isInt32Immediate(N
->getOperand(1).getNode(), Shift
) || (Shift
> 31))
560 if (Opcode
== ISD::SHL
) {
561 // apply shift left to mask if it comes first
562 if (isShiftMask
) Mask
= Mask
<< Shift
;
563 // determine which bits are made indeterminant by shift
564 Indeterminant
= ~(0xFFFFFFFFu
<< Shift
);
565 } else if (Opcode
== ISD::SRL
) {
566 // apply shift right to mask if it comes first
567 if (isShiftMask
) Mask
= Mask
>> Shift
;
568 // determine which bits are made indeterminant by shift
569 Indeterminant
= ~(0xFFFFFFFFu
>> Shift
);
570 // adjust for the left rotate
572 } else if (Opcode
== ISD::ROTL
) {
578 // if the mask doesn't intersect any Indeterminant bits
579 if (Mask
&& !(Mask
& Indeterminant
)) {
581 // make sure the mask is still a mask (wrap arounds may not be)
582 return isRunOfOnes(Mask
, MB
, ME
);
587 /// Turn an or of two masked values into the rotate left word immediate then
588 /// mask insert (rlwimi) instruction.
589 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode
*N
) {
590 SDValue Op0
= N
->getOperand(0);
591 SDValue Op1
= N
->getOperand(1);
594 KnownBits LKnown
, RKnown
;
595 CurDAG
->computeKnownBits(Op0
, LKnown
);
596 CurDAG
->computeKnownBits(Op1
, RKnown
);
598 unsigned TargetMask
= LKnown
.Zero
.getZExtValue();
599 unsigned InsertMask
= RKnown
.Zero
.getZExtValue();
601 if ((TargetMask
| InsertMask
) == 0xFFFFFFFF) {
602 unsigned Op0Opc
= Op0
.getOpcode();
603 unsigned Op1Opc
= Op1
.getOpcode();
604 unsigned Value
, SH
= 0;
605 TargetMask
= ~TargetMask
;
606 InsertMask
= ~InsertMask
;
608 // If the LHS has a foldable shift and the RHS does not, then swap it to the
609 // RHS so that we can fold the shift into the insert.
610 if (Op0Opc
== ISD::AND
&& Op1Opc
== ISD::AND
) {
611 if (Op0
.getOperand(0).getOpcode() == ISD::SHL
||
612 Op0
.getOperand(0).getOpcode() == ISD::SRL
) {
613 if (Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
614 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
616 std::swap(Op0Opc
, Op1Opc
);
617 std::swap(TargetMask
, InsertMask
);
620 } else if (Op0Opc
== ISD::SHL
|| Op0Opc
== ISD::SRL
) {
621 if (Op1Opc
== ISD::AND
&& Op1
.getOperand(0).getOpcode() != ISD::SHL
&&
622 Op1
.getOperand(0).getOpcode() != ISD::SRL
) {
624 std::swap(Op0Opc
, Op1Opc
);
625 std::swap(TargetMask
, InsertMask
);
630 if (isRunOfOnes(InsertMask
, MB
, ME
)) {
633 if ((Op1Opc
== ISD::SHL
|| Op1Opc
== ISD::SRL
) &&
634 isInt32Immediate(Op1
.getOperand(1), Value
)) {
635 Op1
= Op1
.getOperand(0);
636 SH
= (Op1Opc
== ISD::SHL
) ? Value
: 32 - Value
;
638 if (Op1Opc
== ISD::AND
) {
639 // The AND mask might not be a constant, and we need to make sure that
640 // if we're going to fold the masking with the insert, all bits not
641 // know to be zero in the mask are known to be one.
643 CurDAG
->computeKnownBits(Op1
.getOperand(1), MKnown
);
644 bool CanFoldMask
= InsertMask
== MKnown
.One
.getZExtValue();
646 unsigned SHOpc
= Op1
.getOperand(0).getOpcode();
647 if ((SHOpc
== ISD::SHL
|| SHOpc
== ISD::SRL
) && CanFoldMask
&&
648 isInt32Immediate(Op1
.getOperand(0).getOperand(1), Value
)) {
649 // Note that Value must be in range here (less than 32) because
650 // otherwise there would not be any bits set in InsertMask.
651 Op1
= Op1
.getOperand(0).getOperand(0);
652 SH
= (SHOpc
== ISD::SHL
) ? Value
: 32 - Value
;
657 SDValue Ops
[] = { Op0
, Op1
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
659 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
666 // Predict the number of instructions that would be generated by calling
668 static unsigned selectI64ImmInstrCountDirect(int64_t Imm
) {
669 // Assume no remaining bits.
670 unsigned Remainder
= 0;
671 // Assume no shift required.
674 // If it can't be represented as a 32 bit value.
675 if (!isInt
<32>(Imm
)) {
676 Shift
= countTrailingZeros
<uint64_t>(Imm
);
677 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
679 // If the shifted value fits 32 bits.
680 if (isInt
<32>(ImmSh
)) {
681 // Go with the shifted value.
684 // Still stuck with a 64 bit value.
691 // Intermediate operand.
694 // Handle first 32 bits.
695 unsigned Lo
= Imm
& 0xFFFF;
698 if (isInt
<16>(Imm
)) {
702 // Handle the Hi bits and Lo bits.
709 // If no shift, we're done.
710 if (!Shift
) return Result
;
712 // If Hi word == Lo word,
713 // we can use rldimi to insert the Lo word into Hi word.
714 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
719 // Shift for next step if the upper 32-bits were not zero.
723 // Add in the last bits as required.
724 if ((Remainder
>> 16) & 0xFFFF)
726 if (Remainder
& 0xFFFF)
732 static uint64_t Rot64(uint64_t Imm
, unsigned R
) {
733 return (Imm
<< R
) | (Imm
>> (64 - R
));
736 static unsigned selectI64ImmInstrCount(int64_t Imm
) {
737 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
739 // If the instruction count is 1 or 2, we do not need further analysis
740 // since rotate + load constant requires at least 2 instructions.
744 for (unsigned r
= 1; r
< 63; ++r
) {
745 uint64_t RImm
= Rot64(Imm
, r
);
746 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
747 Count
= std::min(Count
, RCount
);
749 // See comments in selectI64Imm for an explanation of the logic below.
750 unsigned LS
= findLastSet(RImm
);
754 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
755 uint64_t RImmWithOnes
= RImm
| OnesMask
;
757 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
758 Count
= std::min(Count
, RCount
);
764 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
765 // (above) needs to be kept in sync with this function.
766 static SDNode
*selectI64ImmDirect(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
768 // Assume no remaining bits.
769 unsigned Remainder
= 0;
770 // Assume no shift required.
773 // If it can't be represented as a 32 bit value.
774 if (!isInt
<32>(Imm
)) {
775 Shift
= countTrailingZeros
<uint64_t>(Imm
);
776 int64_t ImmSh
= static_cast<uint64_t>(Imm
) >> Shift
;
778 // If the shifted value fits 32 bits.
779 if (isInt
<32>(ImmSh
)) {
780 // Go with the shifted value.
783 // Still stuck with a 64 bit value.
790 // Intermediate operand.
793 // Handle first 32 bits.
794 unsigned Lo
= Imm
& 0xFFFF;
795 unsigned Hi
= (Imm
>> 16) & 0xFFFF;
797 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
798 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
802 if (isInt
<16>(Imm
)) {
804 Result
= CurDAG
->getMachineNode(PPC::LI8
, dl
, MVT::i64
, getI32Imm(Lo
));
806 // Handle the Hi bits.
807 unsigned OpC
= Hi
? PPC::LIS8
: PPC::LI8
;
808 Result
= CurDAG
->getMachineNode(OpC
, dl
, MVT::i64
, getI32Imm(Hi
));
810 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
811 SDValue(Result
, 0), getI32Imm(Lo
));
814 Result
= CurDAG
->getMachineNode(PPC::LIS8
, dl
, MVT::i64
, getI32Imm(Hi
));
817 // If no shift, we're done.
818 if (!Shift
) return Result
;
820 // If Hi word == Lo word,
821 // we can use rldimi to insert the Lo word into Hi word.
822 if ((unsigned)(Imm
& 0xFFFFFFFF) == Remainder
) {
824 { SDValue(Result
, 0), SDValue(Result
, 0), getI32Imm(Shift
), getI32Imm(0)};
825 return CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
);
828 // Shift for next step if the upper 32-bits were not zero.
830 Result
= CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
,
833 getI32Imm(63 - Shift
));
836 // Add in the last bits as required.
837 if ((Hi
= (Remainder
>> 16) & 0xFFFF)) {
838 Result
= CurDAG
->getMachineNode(PPC::ORIS8
, dl
, MVT::i64
,
839 SDValue(Result
, 0), getI32Imm(Hi
));
841 if ((Lo
= Remainder
& 0xFFFF)) {
842 Result
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
843 SDValue(Result
, 0), getI32Imm(Lo
));
849 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, const SDLoc
&dl
,
851 unsigned Count
= selectI64ImmInstrCountDirect(Imm
);
853 // If the instruction count is 1 or 2, we do not need further analysis
854 // since rotate + load constant requires at least 2 instructions.
856 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
863 for (unsigned r
= 1; r
< 63; ++r
) {
864 uint64_t RImm
= Rot64(Imm
, r
);
865 unsigned RCount
= selectI64ImmInstrCountDirect(RImm
) + 1;
866 if (RCount
< Count
) {
873 // If the immediate to generate has many trailing zeros, it might be
874 // worthwhile to generate a rotated value with too many leading ones
875 // (because that's free with li/lis's sign-extension semantics), and then
876 // mask them off after rotation.
878 unsigned LS
= findLastSet(RImm
);
879 // We're adding (63-LS) higher-order ones, and we expect to mask them off
880 // after performing the inverse rotation by (64-r). So we need that:
881 // 63-LS == 64-r => LS == r-1
885 uint64_t OnesMask
= -(int64_t) (UINT64_C(1) << (LS
+1));
886 uint64_t RImmWithOnes
= RImm
| OnesMask
;
888 RCount
= selectI64ImmInstrCountDirect(RImmWithOnes
) + 1;
889 if (RCount
< Count
) {
892 MatImm
= RImmWithOnes
;
898 return selectI64ImmDirect(CurDAG
, dl
, Imm
);
900 auto getI32Imm
= [CurDAG
, dl
](unsigned Imm
) {
901 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
904 SDValue Val
= SDValue(selectI64ImmDirect(CurDAG
, dl
, MatImm
), 0);
905 return CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Val
,
906 getI32Imm(64 - RMin
), getI32Imm(MaskEnd
));
909 // Select a 64-bit constant.
910 static SDNode
*selectI64Imm(SelectionDAG
*CurDAG
, SDNode
*N
) {
914 int64_t Imm
= cast
<ConstantSDNode
>(N
)->getZExtValue();
915 return selectI64Imm(CurDAG
, dl
, Imm
);
920 class BitPermutationSelector
{
924 // The bit number in the value, using a convention where bit 0 is the
933 ValueBit(SDValue V
, unsigned I
, Kind K
= Variable
)
934 : V(V
), Idx(I
), K(K
) {}
935 ValueBit(Kind K
= Variable
)
936 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX
), K(K
) {}
938 bool isZero() const {
939 return K
== ConstZero
;
942 bool hasValue() const {
943 return K
== Variable
;
946 SDValue
getValue() const {
947 assert(hasValue() && "Cannot get the value of a constant bit");
951 unsigned getValueBitIndex() const {
952 assert(hasValue() && "Cannot get the value bit index of a constant bit");
957 // A bit group has the same underlying value and the same rotate factor.
961 unsigned StartIdx
, EndIdx
;
963 // This rotation amount assumes that the lower 32 bits of the quantity are
964 // replicated in the high 32 bits by the rotation operator (which is done
965 // by rlwinm and friends in 64-bit mode).
967 // Did converting to Repl32 == true change the rotation factor? If it did,
968 // it decreased it by 32.
970 // Was this group coalesced after setting Repl32 to true?
971 bool Repl32Coalesced
;
973 BitGroup(SDValue V
, unsigned R
, unsigned S
, unsigned E
)
974 : V(V
), RLAmt(R
), StartIdx(S
), EndIdx(E
), Repl32(false), Repl32CR(false),
975 Repl32Coalesced(false) {
976 DEBUG(dbgs() << "\tbit group for " << V
.getNode() << " RLAmt = " << R
<<
977 " [" << S
<< ", " << E
<< "]\n");
981 // Information on each (Value, RLAmt) pair (like the number of groups
982 // associated with each) used to choose the lowering method.
983 struct ValueRotInfo
{
985 unsigned RLAmt
= std::numeric_limits
<unsigned>::max();
986 unsigned NumGroups
= 0;
987 unsigned FirstGroupStartIdx
= std::numeric_limits
<unsigned>::max();
990 ValueRotInfo() = default;
992 // For sorting (in reverse order) by NumGroups, and then by
993 // FirstGroupStartIdx.
994 bool operator < (const ValueRotInfo
&Other
) const {
995 // We need to sort so that the non-Repl32 come first because, when we're
996 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
997 // masking operation.
998 if (Repl32
< Other
.Repl32
)
1000 else if (Repl32
> Other
.Repl32
)
1002 else if (NumGroups
> Other
.NumGroups
)
1004 else if (NumGroups
< Other
.NumGroups
)
1006 else if (FirstGroupStartIdx
< Other
.FirstGroupStartIdx
)
1012 using ValueBitsMemoizedValue
= std::pair
<bool, SmallVector
<ValueBit
, 64>>;
1013 using ValueBitsMemoizer
=
1014 DenseMap
<SDValue
, std::unique_ptr
<ValueBitsMemoizedValue
>>;
1015 ValueBitsMemoizer Memoizer
;
1017 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1018 // The bool is true if something interesting was deduced, otherwise if we're
1019 // providing only a generic representation of V (or something else likewise
1020 // uninteresting for instruction selection) through the SmallVector.
1021 std::pair
<bool, SmallVector
<ValueBit
, 64> *> getValueBits(SDValue V
,
1023 auto &ValueEntry
= Memoizer
[V
];
1025 return std::make_pair(ValueEntry
->first
, &ValueEntry
->second
);
1026 ValueEntry
.reset(new ValueBitsMemoizedValue());
1027 bool &Interesting
= ValueEntry
->first
;
1028 SmallVector
<ValueBit
, 64> &Bits
= ValueEntry
->second
;
1029 Bits
.resize(NumBits
);
1031 switch (V
.getOpcode()) {
1034 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1035 unsigned RotAmt
= V
.getConstantOperandVal(1);
1037 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1039 for (unsigned i
= 0; i
< NumBits
; ++i
)
1040 Bits
[i
] = LHSBits
[i
< RotAmt
? i
+ (NumBits
- RotAmt
) : i
- RotAmt
];
1042 return std::make_pair(Interesting
= true, &Bits
);
1046 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1047 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1049 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1051 for (unsigned i
= ShiftAmt
; i
< NumBits
; ++i
)
1052 Bits
[i
] = LHSBits
[i
- ShiftAmt
];
1054 for (unsigned i
= 0; i
< ShiftAmt
; ++i
)
1055 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1057 return std::make_pair(Interesting
= true, &Bits
);
1061 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1062 unsigned ShiftAmt
= V
.getConstantOperandVal(1);
1064 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1066 for (unsigned i
= 0; i
< NumBits
- ShiftAmt
; ++i
)
1067 Bits
[i
] = LHSBits
[i
+ ShiftAmt
];
1069 for (unsigned i
= NumBits
- ShiftAmt
; i
< NumBits
; ++i
)
1070 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1072 return std::make_pair(Interesting
= true, &Bits
);
1076 if (isa
<ConstantSDNode
>(V
.getOperand(1))) {
1077 uint64_t Mask
= V
.getConstantOperandVal(1);
1079 const SmallVector
<ValueBit
, 64> *LHSBits
;
1080 // Mark this as interesting, only if the LHS was also interesting. This
1081 // prevents the overall procedure from matching a single immediate 'and'
1082 // (which is non-optimal because such an and might be folded with other
1083 // things if we don't select it here).
1084 std::tie(Interesting
, LHSBits
) = getValueBits(V
.getOperand(0), NumBits
);
1086 for (unsigned i
= 0; i
< NumBits
; ++i
)
1087 if (((Mask
>> i
) & 1) == 1)
1088 Bits
[i
] = (*LHSBits
)[i
];
1090 Bits
[i
] = ValueBit(ValueBit::ConstZero
);
1092 return std::make_pair(Interesting
, &Bits
);
1096 const auto &LHSBits
= *getValueBits(V
.getOperand(0), NumBits
).second
;
1097 const auto &RHSBits
= *getValueBits(V
.getOperand(1), NumBits
).second
;
1099 bool AllDisjoint
= true;
1100 for (unsigned i
= 0; i
< NumBits
; ++i
)
1101 if (LHSBits
[i
].isZero())
1102 Bits
[i
] = RHSBits
[i
];
1103 else if (RHSBits
[i
].isZero())
1104 Bits
[i
] = LHSBits
[i
];
1106 AllDisjoint
= false;
1113 return std::make_pair(Interesting
= true, &Bits
);
1117 for (unsigned i
= 0; i
< NumBits
; ++i
)
1118 Bits
[i
] = ValueBit(V
, i
);
1120 return std::make_pair(Interesting
= false, &Bits
);
1123 // For each value (except the constant ones), compute the left-rotate amount
1124 // to get it from its original to final position.
1125 void computeRotationAmounts() {
1127 RLAmt
.resize(Bits
.size());
1128 for (unsigned i
= 0; i
< Bits
.size(); ++i
)
1129 if (Bits
[i
].hasValue()) {
1130 unsigned VBI
= Bits
[i
].getValueBitIndex();
1134 RLAmt
[i
] = Bits
.size() - (VBI
- i
);
1135 } else if (Bits
[i
].isZero()) {
1137 RLAmt
[i
] = UINT32_MAX
;
1139 llvm_unreachable("Unknown value bit type");
1143 // Collect groups of consecutive bits with the same underlying value and
1144 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1145 // they break up groups.
1146 void collectBitGroups(bool LateMask
) {
1149 unsigned LastRLAmt
= RLAmt
[0];
1150 SDValue LastValue
= Bits
[0].hasValue() ? Bits
[0].getValue() : SDValue();
1151 unsigned LastGroupStartIdx
= 0;
1152 for (unsigned i
= 1; i
< Bits
.size(); ++i
) {
1153 unsigned ThisRLAmt
= RLAmt
[i
];
1154 SDValue ThisValue
= Bits
[i
].hasValue() ? Bits
[i
].getValue() : SDValue();
1155 if (LateMask
&& !ThisValue
) {
1156 ThisValue
= LastValue
;
1157 ThisRLAmt
= LastRLAmt
;
1158 // If we're doing late masking, then the first bit group always starts
1159 // at zero (even if the first bits were zero).
1160 if (BitGroups
.empty())
1161 LastGroupStartIdx
= 0;
1164 // If this bit has the same underlying value and the same rotate factor as
1165 // the last one, then they're part of the same group.
1166 if (ThisRLAmt
== LastRLAmt
&& ThisValue
== LastValue
)
1169 if (LastValue
.getNode())
1170 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1172 LastRLAmt
= ThisRLAmt
;
1173 LastValue
= ThisValue
;
1174 LastGroupStartIdx
= i
;
1176 if (LastValue
.getNode())
1177 BitGroups
.push_back(BitGroup(LastValue
, LastRLAmt
, LastGroupStartIdx
,
1180 if (BitGroups
.empty())
1183 // We might be able to combine the first and last groups.
1184 if (BitGroups
.size() > 1) {
1185 // If the first and last groups are the same, then remove the first group
1186 // in favor of the last group, making the ending index of the last group
1187 // equal to the ending index of the to-be-removed first group.
1188 if (BitGroups
[0].StartIdx
== 0 &&
1189 BitGroups
[BitGroups
.size()-1].EndIdx
== Bits
.size()-1 &&
1190 BitGroups
[0].V
== BitGroups
[BitGroups
.size()-1].V
&&
1191 BitGroups
[0].RLAmt
== BitGroups
[BitGroups
.size()-1].RLAmt
) {
1192 DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1193 BitGroups
[BitGroups
.size()-1].EndIdx
= BitGroups
[0].EndIdx
;
1194 BitGroups
.erase(BitGroups
.begin());
1199 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1200 // associated with each. If there is a degeneracy, pick the one that occurs
1201 // first (in the final value).
1202 void collectValueRotInfo() {
1205 for (auto &BG
: BitGroups
) {
1206 unsigned RLAmtKey
= BG
.RLAmt
+ (BG
.Repl32
? 64 : 0);
1207 ValueRotInfo
&VRI
= ValueRots
[std::make_pair(BG
.V
, RLAmtKey
)];
1209 VRI
.RLAmt
= BG
.RLAmt
;
1210 VRI
.Repl32
= BG
.Repl32
;
1212 VRI
.FirstGroupStartIdx
= std::min(VRI
.FirstGroupStartIdx
, BG
.StartIdx
);
1215 // Now that we've collected the various ValueRotInfo instances, we need to
1217 ValueRotsVec
.clear();
1218 for (auto &I
: ValueRots
) {
1219 ValueRotsVec
.push_back(I
.second
);
1221 std::sort(ValueRotsVec
.begin(), ValueRotsVec
.end());
1224 // In 64-bit mode, rlwinm and friends have a rotation operator that
1225 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1226 // indices of these instructions can only be in the lower 32 bits, so they
1227 // can only represent some 64-bit bit groups. However, when they can be used,
1228 // the 32-bit replication can be used to represent, as a single bit group,
1229 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1230 // groups when possible. Returns true if any of the bit groups were
1232 void assignRepl32BitGroups() {
1233 // If we have bits like this:
1235 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1236 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1237 // Groups: | RLAmt = 8 | RLAmt = 40 |
1239 // But, making use of a 32-bit operation that replicates the low-order 32
1240 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1243 auto IsAllLow32
= [this](BitGroup
& BG
) {
1244 if (BG
.StartIdx
<= BG
.EndIdx
) {
1245 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
) {
1246 if (!Bits
[i
].hasValue())
1248 if (Bits
[i
].getValueBitIndex() >= 32)
1252 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
) {
1253 if (!Bits
[i
].hasValue())
1255 if (Bits
[i
].getValueBitIndex() >= 32)
1258 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
) {
1259 if (!Bits
[i
].hasValue())
1261 if (Bits
[i
].getValueBitIndex() >= 32)
1269 for (auto &BG
: BitGroups
) {
1270 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32) {
1271 if (IsAllLow32(BG
)) {
1272 if (BG
.RLAmt
>= 32) {
1279 DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1280 BG
.V
.getNode() << " RLAmt = " << BG
.RLAmt
<<
1281 " [" << BG
.StartIdx
<< ", " << BG
.EndIdx
<< "]\n");
1286 // Now walk through the bit groups, consolidating where possible.
1287 for (auto I
= BitGroups
.begin(); I
!= BitGroups
.end();) {
1288 // We might want to remove this bit group by merging it with the previous
1289 // group (which might be the ending group).
1290 auto IP
= (I
== BitGroups
.begin()) ?
1291 std::prev(BitGroups
.end()) : std::prev(I
);
1292 if (I
->Repl32
&& IP
->Repl32
&& I
->V
== IP
->V
&& I
->RLAmt
== IP
->RLAmt
&&
1293 I
->StartIdx
== (IP
->EndIdx
+ 1) % 64 && I
!= IP
) {
1295 DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1296 I
->V
.getNode() << " RLAmt = " << I
->RLAmt
<<
1297 " [" << I
->StartIdx
<< ", " << I
->EndIdx
<<
1298 "] with group with range [" <<
1299 IP
->StartIdx
<< ", " << IP
->EndIdx
<< "]\n");
1301 IP
->EndIdx
= I
->EndIdx
;
1302 IP
->Repl32CR
= IP
->Repl32CR
|| I
->Repl32CR
;
1303 IP
->Repl32Coalesced
= true;
1304 I
= BitGroups
.erase(I
);
1307 // There is a special case worth handling: If there is a single group
1308 // covering the entire upper 32 bits, and it can be merged with both
1309 // the next and previous groups (which might be the same group), then
1310 // do so. If it is the same group (so there will be only one group in
1311 // total), then we need to reverse the order of the range so that it
1312 // covers the entire 64 bits.
1313 if (I
->StartIdx
== 32 && I
->EndIdx
== 63) {
1314 assert(std::next(I
) == BitGroups
.end() &&
1315 "bit group ends at index 63 but there is another?");
1316 auto IN
= BitGroups
.begin();
1318 if (IP
->Repl32
&& IN
->Repl32
&& I
->V
== IP
->V
&& I
->V
== IN
->V
&&
1319 (I
->RLAmt
% 32) == IP
->RLAmt
&& (I
->RLAmt
% 32) == IN
->RLAmt
&&
1320 IP
->EndIdx
== 31 && IN
->StartIdx
== 0 && I
!= IP
&&
1323 DEBUG(dbgs() << "\tcombining bit group for " <<
1324 I
->V
.getNode() << " RLAmt = " << I
->RLAmt
<<
1325 " [" << I
->StartIdx
<< ", " << I
->EndIdx
<<
1326 "] with 32-bit replicated groups with ranges [" <<
1327 IP
->StartIdx
<< ", " << IP
->EndIdx
<< "] and [" <<
1328 IN
->StartIdx
<< ", " << IN
->EndIdx
<< "]\n");
1331 // There is only one other group; change it to cover the whole
1332 // range (backward, so that it can still be Repl32 but cover the
1333 // whole 64-bit range).
1336 IP
->Repl32CR
= IP
->Repl32CR
|| I
->RLAmt
>= 32;
1337 IP
->Repl32Coalesced
= true;
1338 I
= BitGroups
.erase(I
);
1340 // There are two separate groups, one before this group and one
1341 // after us (at the beginning). We're going to remove this group,
1342 // but also the group at the very beginning.
1343 IP
->EndIdx
= IN
->EndIdx
;
1344 IP
->Repl32CR
= IP
->Repl32CR
|| IN
->Repl32CR
|| I
->RLAmt
>= 32;
1345 IP
->Repl32Coalesced
= true;
1346 I
= BitGroups
.erase(I
);
1347 BitGroups
.erase(BitGroups
.begin());
1350 // This must be the last group in the vector (and we might have
1351 // just invalidated the iterator above), so break here.
1361 SDValue
getI32Imm(unsigned Imm
, const SDLoc
&dl
) {
1362 return CurDAG
->getTargetConstant(Imm
, dl
, MVT::i32
);
1365 uint64_t getZerosMask() {
1367 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1368 if (Bits
[i
].hasValue())
1370 Mask
|= (UINT64_C(1) << i
);
1376 // Depending on the number of groups for a particular value, it might be
1377 // better to rotate, mask explicitly (using andi/andis), and then or the
1378 // result. Select this part of the result first.
1379 void SelectAndParts32(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1380 if (BPermRewriterNoMasking
)
1383 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1385 for (unsigned i
= 0; i
< Bits
.size(); ++i
) {
1386 if (!Bits
[i
].hasValue() || Bits
[i
].getValue() != VRI
.V
)
1388 if (RLAmt
[i
] != VRI
.RLAmt
)
1393 // Compute the masks for andi/andis that would be necessary.
1394 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1395 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1396 "No set bits in mask for value bit groups");
1397 bool NeedsRotate
= VRI
.RLAmt
!= 0;
1399 // We're trying to minimize the number of instructions. If we have one
1400 // group, using one of andi/andis can break even. If we have three
1401 // groups, we can use both andi and andis and break even (to use both
1402 // andi and andis we also need to or the results together). We need four
1403 // groups if we also need to rotate. To use andi/andis we need to do more
1404 // than break even because rotate-and-mask instructions tend to be easier
1407 // FIXME: We've biased here against using andi/andis, which is right for
1408 // POWER cores, but not optimal everywhere. For example, on the A2,
1409 // andi/andis have single-cycle latency whereas the rotate-and-mask
1410 // instructions take two cycles, and it would be better to bias toward
1411 // andi/andis in break-even cases.
1413 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
1414 (unsigned) (ANDIMask
!= 0) +
1415 (unsigned) (ANDISMask
!= 0) +
1416 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0) +
1417 (unsigned) (bool) Res
;
1419 DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode() <<
1420 " RL: " << VRI
.RLAmt
<< ":" <<
1421 "\n\t\t\tisel using masking: " << NumAndInsts
<<
1422 " using rotates: " << VRI
.NumGroups
<< "\n");
1424 if (NumAndInsts
>= VRI
.NumGroups
)
1427 DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1429 if (InstCnt
) *InstCnt
+= NumAndInsts
;
1434 { VRI
.V
, getI32Imm(VRI
.RLAmt
, dl
), getI32Imm(0, dl
),
1435 getI32Imm(31, dl
) };
1436 VRot
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
1442 SDValue ANDIVal
, ANDISVal
;
1444 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1445 VRot
, getI32Imm(ANDIMask
, dl
)), 0);
1447 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1448 VRot
, getI32Imm(ANDISMask
, dl
)), 0);
1452 TotalVal
= ANDISVal
;
1456 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1457 ANDIVal
, ANDISVal
), 0);
1462 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1465 // Now, remove all groups with this underlying value and rotation
1467 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1468 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1473 // Instruction selection for the 32-bit case.
1474 SDNode
*Select32(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
1478 if (InstCnt
) *InstCnt
= 0;
1480 // Take care of cases that should use andi/andis first.
1481 SelectAndParts32(dl
, Res
, InstCnt
);
1483 // If we've not yet selected a 'starting' instruction, and we have no zeros
1484 // to fill in, select the (Value, RLAmt) with the highest priority (largest
1485 // number of groups), and start with this rotated value.
1486 if ((!HasZeros
|| LateMask
) && !Res
) {
1487 ValueRotInfo
&VRI
= ValueRotsVec
[0];
1489 if (InstCnt
) *InstCnt
+= 1;
1491 { VRI
.V
, getI32Imm(VRI
.RLAmt
, dl
), getI32Imm(0, dl
),
1492 getI32Imm(31, dl
) };
1493 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
),
1499 // Now, remove all groups with this underlying value and rotation factor.
1500 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1501 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
;
1505 if (InstCnt
) *InstCnt
+= BitGroups
.size();
1507 // Insert the other groups (one at a time).
1508 for (auto &BG
: BitGroups
) {
1511 { BG
.V
, getI32Imm(BG
.RLAmt
, dl
),
1512 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1513 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1514 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
1517 { Res
, BG
.V
, getI32Imm(BG
.RLAmt
, dl
),
1518 getI32Imm(Bits
.size() - BG
.EndIdx
- 1, dl
),
1519 getI32Imm(Bits
.size() - BG
.StartIdx
- 1, dl
) };
1520 Res
= SDValue(CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
), 0);
1525 unsigned Mask
= (unsigned) getZerosMask();
1527 unsigned ANDIMask
= (Mask
& UINT16_MAX
), ANDISMask
= Mask
>> 16;
1528 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1529 "No set bits in zeros mask?");
1531 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
1532 (unsigned) (ANDISMask
!= 0) +
1533 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1535 SDValue ANDIVal
, ANDISVal
;
1537 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo
, dl
, MVT::i32
,
1538 Res
, getI32Imm(ANDIMask
, dl
)), 0);
1540 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo
, dl
, MVT::i32
,
1541 Res
, getI32Imm(ANDISMask
, dl
)), 0);
1548 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR
, dl
, MVT::i32
,
1549 ANDIVal
, ANDISVal
), 0);
1552 return Res
.getNode();
1555 unsigned SelectRotMask64Count(unsigned RLAmt
, bool Repl32
,
1556 unsigned MaskStart
, unsigned MaskEnd
,
1558 // In the notation used by the instructions, 'start' and 'end' are reversed
1559 // because bits are counted from high to low order.
1560 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1561 InstMaskEnd
= 64 - MaskStart
- 1;
1566 if ((!IsIns
&& (InstMaskEnd
== 63 || InstMaskStart
== 0)) ||
1567 InstMaskEnd
== 63 - RLAmt
)
1573 // For 64-bit values, not all combinations of rotates and masks are
1574 // available. Produce one if it is available.
1575 SDValue
SelectRotMask64(SDValue V
, const SDLoc
&dl
, unsigned RLAmt
,
1576 bool Repl32
, unsigned MaskStart
, unsigned MaskEnd
,
1577 unsigned *InstCnt
= nullptr) {
1578 // In the notation used by the instructions, 'start' and 'end' are reversed
1579 // because bits are counted from high to low order.
1580 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1581 InstMaskEnd
= 64 - MaskStart
- 1;
1583 if (InstCnt
) *InstCnt
+= 1;
1586 // This rotation amount assumes that the lower 32 bits of the quantity
1587 // are replicated in the high 32 bits by the rotation operator (which is
1588 // done by rlwinm and friends).
1589 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1590 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1592 { V
, getI32Imm(RLAmt
, dl
), getI32Imm(InstMaskStart
- 32, dl
),
1593 getI32Imm(InstMaskEnd
- 32, dl
) };
1594 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM8
, dl
, MVT::i64
,
1598 if (InstMaskEnd
== 63) {
1600 { V
, getI32Imm(RLAmt
, dl
), getI32Imm(InstMaskStart
, dl
) };
1601 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Ops
), 0);
1604 if (InstMaskStart
== 0) {
1606 { V
, getI32Imm(RLAmt
, dl
), getI32Imm(InstMaskEnd
, dl
) };
1607 return SDValue(CurDAG
->getMachineNode(PPC::RLDICR
, dl
, MVT::i64
, Ops
), 0);
1610 if (InstMaskEnd
== 63 - RLAmt
) {
1612 { V
, getI32Imm(RLAmt
, dl
), getI32Imm(InstMaskStart
, dl
) };
1613 return SDValue(CurDAG
->getMachineNode(PPC::RLDIC
, dl
, MVT::i64
, Ops
), 0);
1616 // We cannot do this with a single instruction, so we'll use two. The
1617 // problem is that we're not free to choose both a rotation amount and mask
1618 // start and end independently. We can choose an arbitrary mask start and
1619 // end, but then the rotation amount is fixed. Rotation, however, can be
1620 // inverted, and so by applying an "inverse" rotation first, we can get the
1622 if (InstCnt
) *InstCnt
+= 1;
1624 // The rotation mask for the second instruction must be MaskStart.
1625 unsigned RLAmt2
= MaskStart
;
1626 // The first instruction must rotate V so that the overall rotation amount
1628 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
1630 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
1631 return SelectRotMask64(V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
1634 // For 64-bit values, not all combinations of rotates and masks are
1635 // available. Produce a rotate-mask-and-insert if one is available.
1636 SDValue
SelectRotMaskIns64(SDValue Base
, SDValue V
, const SDLoc
&dl
,
1637 unsigned RLAmt
, bool Repl32
, unsigned MaskStart
,
1638 unsigned MaskEnd
, unsigned *InstCnt
= nullptr) {
1639 // In the notation used by the instructions, 'start' and 'end' are reversed
1640 // because bits are counted from high to low order.
1641 unsigned InstMaskStart
= 64 - MaskEnd
- 1,
1642 InstMaskEnd
= 64 - MaskStart
- 1;
1644 if (InstCnt
) *InstCnt
+= 1;
1647 // This rotation amount assumes that the lower 32 bits of the quantity
1648 // are replicated in the high 32 bits by the rotation operator (which is
1649 // done by rlwinm and friends).
1650 assert(InstMaskStart
>= 32 && "Mask cannot start out of range");
1651 assert(InstMaskEnd
>= 32 && "Mask cannot end out of range");
1653 { Base
, V
, getI32Imm(RLAmt
, dl
), getI32Imm(InstMaskStart
- 32, dl
),
1654 getI32Imm(InstMaskEnd
- 32, dl
) };
1655 return SDValue(CurDAG
->getMachineNode(PPC::RLWIMI8
, dl
, MVT::i64
,
1659 if (InstMaskEnd
== 63 - RLAmt
) {
1661 { Base
, V
, getI32Imm(RLAmt
, dl
), getI32Imm(InstMaskStart
, dl
) };
1662 return SDValue(CurDAG
->getMachineNode(PPC::RLDIMI
, dl
, MVT::i64
, Ops
), 0);
1665 // We cannot do this with a single instruction, so we'll use two. The
1666 // problem is that we're not free to choose both a rotation amount and mask
1667 // start and end independently. We can choose an arbitrary mask start and
1668 // end, but then the rotation amount is fixed. Rotation, however, can be
1669 // inverted, and so by applying an "inverse" rotation first, we can get the
1671 if (InstCnt
) *InstCnt
+= 1;
1673 // The rotation mask for the second instruction must be MaskStart.
1674 unsigned RLAmt2
= MaskStart
;
1675 // The first instruction must rotate V so that the overall rotation amount
1677 unsigned RLAmt1
= (64 + RLAmt
- RLAmt2
) % 64;
1679 V
= SelectRotMask64(V
, dl
, RLAmt1
, false, 0, 63);
1680 return SelectRotMaskIns64(Base
, V
, dl
, RLAmt2
, false, MaskStart
, MaskEnd
);
1683 void SelectAndParts64(const SDLoc
&dl
, SDValue
&Res
, unsigned *InstCnt
) {
1684 if (BPermRewriterNoMasking
)
1687 // The idea here is the same as in the 32-bit version, but with additional
1688 // complications from the fact that Repl32 might be true. Because we
1689 // aggressively convert bit groups to Repl32 form (which, for small
1690 // rotation factors, involves no other change), and then coalesce, it might
1691 // be the case that a single 64-bit masking operation could handle both
1692 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1693 // form allowed coalescing, then we must use a 32-bit rotaton in order to
1694 // completely capture the new combined bit group.
1696 for (ValueRotInfo
&VRI
: ValueRotsVec
) {
1699 // We need to add to the mask all bits from the associated bit groups.
1700 // If Repl32 is false, we need to add bits from bit groups that have
1701 // Repl32 true, but are trivially convertable to Repl32 false. Such a
1702 // group is trivially convertable if it overlaps only with the lower 32
1703 // bits, and the group has not been coalesced.
1704 auto MatchingBG
= [VRI
](const BitGroup
&BG
) {
1708 unsigned EffRLAmt
= BG
.RLAmt
;
1709 if (!VRI
.Repl32
&& BG
.Repl32
) {
1710 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
<= BG
.EndIdx
&&
1711 !BG
.Repl32Coalesced
) {
1717 } else if (VRI
.Repl32
!= BG
.Repl32
) {
1721 return VRI
.RLAmt
== EffRLAmt
;
1724 for (auto &BG
: BitGroups
) {
1725 if (!MatchingBG(BG
))
1728 if (BG
.StartIdx
<= BG
.EndIdx
) {
1729 for (unsigned i
= BG
.StartIdx
; i
<= BG
.EndIdx
; ++i
)
1730 Mask
|= (UINT64_C(1) << i
);
1732 for (unsigned i
= BG
.StartIdx
; i
< Bits
.size(); ++i
)
1733 Mask
|= (UINT64_C(1) << i
);
1734 for (unsigned i
= 0; i
<= BG
.EndIdx
; ++i
)
1735 Mask
|= (UINT64_C(1) << i
);
1739 // We can use the 32-bit andi/andis technique if the mask does not
1740 // require any higher-order bits. This can save an instruction compared
1741 // to always using the general 64-bit technique.
1742 bool Use32BitInsts
= isUInt
<32>(Mask
);
1743 // Compute the masks for andi/andis that would be necessary.
1744 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
1745 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
1747 bool NeedsRotate
= VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
));
1749 unsigned NumAndInsts
= (unsigned) NeedsRotate
+
1750 (unsigned) (bool) Res
;
1752 NumAndInsts
+= (unsigned) (ANDIMask
!= 0) + (unsigned) (ANDISMask
!= 0) +
1753 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1755 NumAndInsts
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
1757 unsigned NumRLInsts
= 0;
1758 bool FirstBG
= true;
1759 bool MoreBG
= false;
1760 for (auto &BG
: BitGroups
) {
1761 if (!MatchingBG(BG
)) {
1766 SelectRotMask64Count(BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
, BG
.EndIdx
,
1771 DEBUG(dbgs() << "\t\trotation groups for " << VRI
.V
.getNode() <<
1772 " RL: " << VRI
.RLAmt
<< (VRI
.Repl32
? " (32):" : ":") <<
1773 "\n\t\t\tisel using masking: " << NumAndInsts
<<
1774 " using rotates: " << NumRLInsts
<< "\n");
1776 // When we'd use andi/andis, we bias toward using the rotates (andi only
1777 // has a record form, and is cracked on POWER cores). However, when using
1778 // general 64-bit constant formation, bias toward the constant form,
1779 // because that exposes more opportunities for CSE.
1780 if (NumAndInsts
> NumRLInsts
)
1782 // When merging multiple bit groups, instruction or is used.
1783 // But when rotate is used, rldimi can inert the rotated value into any
1784 // register, so instruction or can be avoided.
1785 if ((Use32BitInsts
|| MoreBG
) && NumAndInsts
== NumRLInsts
)
1788 DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1790 if (InstCnt
) *InstCnt
+= NumAndInsts
;
1793 // We actually need to generate a rotation if we have a non-zero rotation
1794 // factor or, in the Repl32 case, if we care about any of the
1795 // higher-order replicated bits. In the latter case, we generate a mask
1796 // backward so that it actually includes the entire 64 bits.
1797 if (VRI
.RLAmt
|| (VRI
.Repl32
&& !isUInt
<32>(Mask
)))
1798 VRot
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
1799 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63);
1804 if (Use32BitInsts
) {
1805 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1806 "No set bits in mask when using 32-bit ands for 64-bit value");
1808 SDValue ANDIVal
, ANDISVal
;
1810 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
1811 VRot
, getI32Imm(ANDIMask
, dl
)), 0);
1813 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
1814 VRot
, getI32Imm(ANDISMask
, dl
)), 0);
1817 TotalVal
= ANDISVal
;
1821 TotalVal
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
1822 ANDIVal
, ANDISVal
), 0);
1824 TotalVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
1826 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
1827 VRot
, TotalVal
), 0);
1833 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
1836 // Now, remove all groups with this underlying value and rotation
1838 eraseMatchingBitGroups(MatchingBG
);
1842 // Instruction selection for the 64-bit case.
1843 SDNode
*Select64(SDNode
*N
, bool LateMask
, unsigned *InstCnt
) {
1847 if (InstCnt
) *InstCnt
= 0;
1849 // Take care of cases that should use andi/andis first.
1850 SelectAndParts64(dl
, Res
, InstCnt
);
1852 // If we've not yet selected a 'starting' instruction, and we have no zeros
1853 // to fill in, select the (Value, RLAmt) with the highest priority (largest
1854 // number of groups), and start with this rotated value.
1855 if ((!HasZeros
|| LateMask
) && !Res
) {
1856 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
1857 // groups will come first, and so the VRI representing the largest number
1858 // of groups might not be first (it might be the first Repl32 groups).
1859 unsigned MaxGroupsIdx
= 0;
1860 if (!ValueRotsVec
[0].Repl32
) {
1861 for (unsigned i
= 0, ie
= ValueRotsVec
.size(); i
< ie
; ++i
)
1862 if (ValueRotsVec
[i
].Repl32
) {
1863 if (ValueRotsVec
[i
].NumGroups
> ValueRotsVec
[0].NumGroups
)
1869 ValueRotInfo
&VRI
= ValueRotsVec
[MaxGroupsIdx
];
1870 bool NeedsRotate
= false;
1873 } else if (VRI
.Repl32
) {
1874 for (auto &BG
: BitGroups
) {
1875 if (BG
.V
!= VRI
.V
|| BG
.RLAmt
!= VRI
.RLAmt
||
1876 BG
.Repl32
!= VRI
.Repl32
)
1879 // We don't need a rotate if the bit group is confined to the lower
1881 if (BG
.StartIdx
< 32 && BG
.EndIdx
< 32 && BG
.StartIdx
< BG
.EndIdx
)
1890 Res
= SelectRotMask64(VRI
.V
, dl
, VRI
.RLAmt
, VRI
.Repl32
,
1891 VRI
.Repl32
? 31 : 0, VRI
.Repl32
? 30 : 63,
1896 // Now, remove all groups with this underlying value and rotation factor.
1898 eraseMatchingBitGroups([VRI
](const BitGroup
&BG
) {
1899 return BG
.V
== VRI
.V
&& BG
.RLAmt
== VRI
.RLAmt
&&
1900 BG
.Repl32
== VRI
.Repl32
;
1904 // Because 64-bit rotates are more flexible than inserts, we might have a
1905 // preference regarding which one we do first (to save one instruction).
1907 for (auto I
= BitGroups
.begin(), IE
= BitGroups
.end(); I
!= IE
; ++I
) {
1908 if (SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
1910 SelectRotMask64Count(I
->RLAmt
, I
->Repl32
, I
->StartIdx
, I
->EndIdx
,
1912 if (I
!= BitGroups
.begin()) {
1915 BitGroups
.insert(BitGroups
.begin(), BG
);
1922 // Insert the other groups (one at a time).
1923 for (auto &BG
: BitGroups
) {
1925 Res
= SelectRotMask64(BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
, BG
.StartIdx
,
1926 BG
.EndIdx
, InstCnt
);
1928 Res
= SelectRotMaskIns64(Res
, BG
.V
, dl
, BG
.RLAmt
, BG
.Repl32
,
1929 BG
.StartIdx
, BG
.EndIdx
, InstCnt
);
1933 uint64_t Mask
= getZerosMask();
1935 // We can use the 32-bit andi/andis technique if the mask does not
1936 // require any higher-order bits. This can save an instruction compared
1937 // to always using the general 64-bit technique.
1938 bool Use32BitInsts
= isUInt
<32>(Mask
);
1939 // Compute the masks for andi/andis that would be necessary.
1940 unsigned ANDIMask
= (Mask
& UINT16_MAX
),
1941 ANDISMask
= (Mask
>> 16) & UINT16_MAX
;
1943 if (Use32BitInsts
) {
1944 assert((ANDIMask
!= 0 || ANDISMask
!= 0) &&
1945 "No set bits in mask when using 32-bit ands for 64-bit value");
1947 if (InstCnt
) *InstCnt
+= (unsigned) (ANDIMask
!= 0) +
1948 (unsigned) (ANDISMask
!= 0) +
1949 (unsigned) (ANDIMask
!= 0 && ANDISMask
!= 0);
1951 SDValue ANDIVal
, ANDISVal
;
1953 ANDIVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDIo8
, dl
, MVT::i64
,
1954 Res
, getI32Imm(ANDIMask
, dl
)), 0);
1956 ANDISVal
= SDValue(CurDAG
->getMachineNode(PPC::ANDISo8
, dl
, MVT::i64
,
1957 Res
, getI32Imm(ANDISMask
, dl
)), 0);
1964 Res
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
1965 ANDIVal
, ANDISVal
), 0);
1967 if (InstCnt
) *InstCnt
+= selectI64ImmInstrCount(Mask
) + /* and */ 1;
1969 SDValue MaskVal
= SDValue(selectI64Imm(CurDAG
, dl
, Mask
), 0);
1971 SDValue(CurDAG
->getMachineNode(PPC::AND8
, dl
, MVT::i64
,
1976 return Res
.getNode();
1979 SDNode
*Select(SDNode
*N
, bool LateMask
, unsigned *InstCnt
= nullptr) {
1980 // Fill in BitGroups.
1981 collectBitGroups(LateMask
);
1982 if (BitGroups
.empty())
1985 // For 64-bit values, figure out when we can use 32-bit instructions.
1986 if (Bits
.size() == 64)
1987 assignRepl32BitGroups();
1989 // Fill in ValueRotsVec.
1990 collectValueRotInfo();
1992 if (Bits
.size() == 32) {
1993 return Select32(N
, LateMask
, InstCnt
);
1995 assert(Bits
.size() == 64 && "Not 64 bits here?");
1996 return Select64(N
, LateMask
, InstCnt
);
2002 void eraseMatchingBitGroups(function_ref
<bool(const BitGroup
&)> F
) {
2003 BitGroups
.erase(remove_if(BitGroups
, F
), BitGroups
.end());
2006 SmallVector
<ValueBit
, 64> Bits
;
2009 SmallVector
<unsigned, 64> RLAmt
;
2011 SmallVector
<BitGroup
, 16> BitGroups
;
2013 DenseMap
<std::pair
<SDValue
, unsigned>, ValueRotInfo
> ValueRots
;
2014 SmallVector
<ValueRotInfo
, 16> ValueRotsVec
;
2016 SelectionDAG
*CurDAG
;
2019 BitPermutationSelector(SelectionDAG
*DAG
)
2022 // Here we try to match complex bit permutations into a set of
2023 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2024 // known to produce optimial code for common cases (like i32 byte swapping).
2025 SDNode
*Select(SDNode
*N
) {
2028 getValueBits(SDValue(N
, 0), N
->getValueType(0).getSizeInBits());
2031 Bits
= std::move(*Result
.second
);
2033 DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2034 " selection for: ");
2035 DEBUG(N
->dump(CurDAG
));
2037 // Fill it RLAmt and set HasZeros.
2038 computeRotationAmounts();
2041 return Select(N
, false);
2043 // We currently have two techniques for handling results with zeros: early
2044 // masking (the default) and late masking. Late masking is sometimes more
2045 // efficient, but because the structure of the bit groups is different, it
2046 // is hard to tell without generating both and comparing the results. With
2047 // late masking, we ignore zeros in the resulting value when inserting each
2048 // set of bit groups, and then mask in the zeros at the end. With early
2049 // masking, we only insert the non-zero parts of the result at every step.
2051 unsigned InstCnt
, InstCntLateMask
;
2052 DEBUG(dbgs() << "\tEarly masking:\n");
2053 SDNode
*RN
= Select(N
, false, &InstCnt
);
2054 DEBUG(dbgs() << "\t\tisel would use " << InstCnt
<< " instructions\n");
2056 DEBUG(dbgs() << "\tLate masking:\n");
2057 SDNode
*RNLM
= Select(N
, true, &InstCntLateMask
);
2058 DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
<<
2061 if (InstCnt
<= InstCntLateMask
) {
2062 DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2066 DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2071 } // end anonymous namespace
2073 bool PPCDAGToDAGISel::tryBitPermutation(SDNode
*N
) {
2074 if (N
->getValueType(0) != MVT::i32
&&
2075 N
->getValueType(0) != MVT::i64
)
2078 if (!UseBitPermRewriter
)
2081 switch (N
->getOpcode()) {
2088 BitPermutationSelector
BPS(CurDAG
);
2089 if (SDNode
*New
= BPS
.Select(N
)) {
2090 ReplaceNode(N
, New
);
2100 /// SelectCC - Select a comparison of the specified values with the specified
2101 /// condition code, returning the CR# of the expression.
2102 SDValue
PPCDAGToDAGISel::SelectCC(SDValue LHS
, SDValue RHS
, ISD::CondCode CC
,
2104 // Always select the LHS.
2107 if (LHS
.getValueType() == MVT::i32
) {
2109 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
2110 if (isInt32Immediate(RHS
, Imm
)) {
2111 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2112 if (isUInt
<16>(Imm
))
2113 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
2114 getI32Imm(Imm
& 0xFFFF, dl
)),
2116 // If this is a 16-bit signed immediate, fold it.
2117 if (isInt
<16>((int)Imm
))
2118 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
2119 getI32Imm(Imm
& 0xFFFF, dl
)),
2122 // For non-equality comparisons, the default code would materialize the
2123 // constant, then compare against it, like this:
2125 // ori r2, r2, 22136
2127 // Since we are just comparing for equality, we can emit this instead:
2128 // xoris r0,r3,0x1234
2129 // cmplwi cr0,r0,0x5678
2131 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS
, dl
, MVT::i32
, LHS
,
2132 getI32Imm(Imm
>> 16, dl
)), 0);
2133 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, Xor
,
2134 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
2137 } else if (ISD::isUnsignedIntSetCC(CC
)) {
2138 if (isInt32Immediate(RHS
, Imm
) && isUInt
<16>(Imm
))
2139 return SDValue(CurDAG
->getMachineNode(PPC::CMPLWI
, dl
, MVT::i32
, LHS
,
2140 getI32Imm(Imm
& 0xFFFF, dl
)), 0);
2144 if (isIntS16Immediate(RHS
, SImm
))
2145 return SDValue(CurDAG
->getMachineNode(PPC::CMPWI
, dl
, MVT::i32
, LHS
,
2146 getI32Imm((int)SImm
& 0xFFFF,
2151 } else if (LHS
.getValueType() == MVT::i64
) {
2153 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
2154 if (isInt64Immediate(RHS
.getNode(), Imm
)) {
2155 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2156 if (isUInt
<16>(Imm
))
2157 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
2158 getI32Imm(Imm
& 0xFFFF, dl
)),
2160 // If this is a 16-bit signed immediate, fold it.
2162 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
2163 getI32Imm(Imm
& 0xFFFF, dl
)),
2166 // For non-equality comparisons, the default code would materialize the
2167 // constant, then compare against it, like this:
2169 // ori r2, r2, 22136
2171 // Since we are just comparing for equality, we can emit this instead:
2172 // xoris r0,r3,0x1234
2173 // cmpldi cr0,r0,0x5678
2175 if (isUInt
<32>(Imm
)) {
2176 SDValue
Xor(CurDAG
->getMachineNode(PPC::XORIS8
, dl
, MVT::i64
, LHS
,
2177 getI64Imm(Imm
>> 16, dl
)), 0);
2178 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, Xor
,
2179 getI64Imm(Imm
& 0xFFFF, dl
)),
2184 } else if (ISD::isUnsignedIntSetCC(CC
)) {
2185 if (isInt64Immediate(RHS
.getNode(), Imm
) && isUInt
<16>(Imm
))
2186 return SDValue(CurDAG
->getMachineNode(PPC::CMPLDI
, dl
, MVT::i64
, LHS
,
2187 getI64Imm(Imm
& 0xFFFF, dl
)), 0);
2191 if (isIntS16Immediate(RHS
, SImm
))
2192 return SDValue(CurDAG
->getMachineNode(PPC::CMPDI
, dl
, MVT::i64
, LHS
,
2193 getI64Imm(SImm
& 0xFFFF, dl
)),
2197 } else if (LHS
.getValueType() == MVT::f32
) {
2200 assert(LHS
.getValueType() == MVT::f64
&& "Unknown vt!");
2201 Opc
= PPCSubTarget
->hasVSX() ? PPC::XSCMPUDP
: PPC::FCMPUD
;
2203 return SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::i32
, LHS
, RHS
), 0);
2206 static PPC::Predicate
getPredicateForSetCC(ISD::CondCode CC
) {
2212 llvm_unreachable("Should be lowered by legalize!");
2213 default: llvm_unreachable("Unknown condition!");
2215 case ISD::SETEQ
: return PPC::PRED_EQ
;
2217 case ISD::SETNE
: return PPC::PRED_NE
;
2219 case ISD::SETLT
: return PPC::PRED_LT
;
2221 case ISD::SETLE
: return PPC::PRED_LE
;
2223 case ISD::SETGT
: return PPC::PRED_GT
;
2225 case ISD::SETGE
: return PPC::PRED_GE
;
2226 case ISD::SETO
: return PPC::PRED_NU
;
2227 case ISD::SETUO
: return PPC::PRED_UN
;
2228 // These two are invalid for floating point. Assume we have int.
2229 case ISD::SETULT
: return PPC::PRED_LT
;
2230 case ISD::SETUGT
: return PPC::PRED_GT
;
2234 /// getCRIdxForSetCC - Return the index of the condition register field
2235 /// associated with the SetCC condition, and whether or not the field is
2236 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
2237 static unsigned getCRIdxForSetCC(ISD::CondCode CC
, bool &Invert
) {
2240 default: llvm_unreachable("Unknown condition!");
2242 case ISD::SETLT
: return 0; // Bit #0 = SETOLT
2244 case ISD::SETGT
: return 1; // Bit #1 = SETOGT
2246 case ISD::SETEQ
: return 2; // Bit #2 = SETOEQ
2247 case ISD::SETUO
: return 3; // Bit #3 = SETUO
2249 case ISD::SETGE
: Invert
= true; return 0; // !Bit #0 = SETUGE
2251 case ISD::SETLE
: Invert
= true; return 1; // !Bit #1 = SETULE
2253 case ISD::SETNE
: Invert
= true; return 2; // !Bit #2 = SETUNE
2254 case ISD::SETO
: Invert
= true; return 3; // !Bit #3 = SETO
2259 llvm_unreachable("Invalid branch code: should be expanded by legalize");
2260 // These are invalid for floating point. Assume integer.
2261 case ISD::SETULT
: return 0;
2262 case ISD::SETUGT
: return 1;
2266 // getVCmpInst: return the vector compare instruction for the specified
2267 // vector type and condition code. Since this is for altivec specific code,
2268 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
2269 static unsigned int getVCmpInst(MVT VecVT
, ISD::CondCode CC
,
2270 bool HasVSX
, bool &Swap
, bool &Negate
) {
2274 if (VecVT
.isFloatingPoint()) {
2275 /* Handle some cases by swapping input operands. */
2277 case ISD::SETLE
: CC
= ISD::SETGE
; Swap
= true; break;
2278 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
2279 case ISD::SETOLE
: CC
= ISD::SETOGE
; Swap
= true; break;
2280 case ISD::SETOLT
: CC
= ISD::SETOGT
; Swap
= true; break;
2281 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
2282 case ISD::SETUGT
: CC
= ISD::SETULT
; Swap
= true; break;
2285 /* Handle some cases by negating the result. */
2287 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
2288 case ISD::SETUNE
: CC
= ISD::SETOEQ
; Negate
= true; break;
2289 case ISD::SETULE
: CC
= ISD::SETOGT
; Negate
= true; break;
2290 case ISD::SETULT
: CC
= ISD::SETOGE
; Negate
= true; break;
2293 /* We have instructions implementing the remaining cases. */
2297 if (VecVT
== MVT::v4f32
)
2298 return HasVSX
? PPC::XVCMPEQSP
: PPC::VCMPEQFP
;
2299 else if (VecVT
== MVT::v2f64
)
2300 return PPC::XVCMPEQDP
;
2304 if (VecVT
== MVT::v4f32
)
2305 return HasVSX
? PPC::XVCMPGTSP
: PPC::VCMPGTFP
;
2306 else if (VecVT
== MVT::v2f64
)
2307 return PPC::XVCMPGTDP
;
2311 if (VecVT
== MVT::v4f32
)
2312 return HasVSX
? PPC::XVCMPGESP
: PPC::VCMPGEFP
;
2313 else if (VecVT
== MVT::v2f64
)
2314 return PPC::XVCMPGEDP
;
2319 llvm_unreachable("Invalid floating-point vector compare condition");
2321 /* Handle some cases by swapping input operands. */
2323 case ISD::SETGE
: CC
= ISD::SETLE
; Swap
= true; break;
2324 case ISD::SETLT
: CC
= ISD::SETGT
; Swap
= true; break;
2325 case ISD::SETUGE
: CC
= ISD::SETULE
; Swap
= true; break;
2326 case ISD::SETULT
: CC
= ISD::SETUGT
; Swap
= true; break;
2329 /* Handle some cases by negating the result. */
2331 case ISD::SETNE
: CC
= ISD::SETEQ
; Negate
= true; break;
2332 case ISD::SETUNE
: CC
= ISD::SETUEQ
; Negate
= true; break;
2333 case ISD::SETLE
: CC
= ISD::SETGT
; Negate
= true; break;
2334 case ISD::SETULE
: CC
= ISD::SETUGT
; Negate
= true; break;
2337 /* We have instructions implementing the remaining cases. */
2341 if (VecVT
== MVT::v16i8
)
2342 return PPC::VCMPEQUB
;
2343 else if (VecVT
== MVT::v8i16
)
2344 return PPC::VCMPEQUH
;
2345 else if (VecVT
== MVT::v4i32
)
2346 return PPC::VCMPEQUW
;
2347 else if (VecVT
== MVT::v2i64
)
2348 return PPC::VCMPEQUD
;
2351 if (VecVT
== MVT::v16i8
)
2352 return PPC::VCMPGTSB
;
2353 else if (VecVT
== MVT::v8i16
)
2354 return PPC::VCMPGTSH
;
2355 else if (VecVT
== MVT::v4i32
)
2356 return PPC::VCMPGTSW
;
2357 else if (VecVT
== MVT::v2i64
)
2358 return PPC::VCMPGTSD
;
2361 if (VecVT
== MVT::v16i8
)
2362 return PPC::VCMPGTUB
;
2363 else if (VecVT
== MVT::v8i16
)
2364 return PPC::VCMPGTUH
;
2365 else if (VecVT
== MVT::v4i32
)
2366 return PPC::VCMPGTUW
;
2367 else if (VecVT
== MVT::v2i64
)
2368 return PPC::VCMPGTUD
;
2373 llvm_unreachable("Invalid integer vector compare condition");
2377 bool PPCDAGToDAGISel::trySETCC(SDNode
*N
) {
2380 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
2382 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
2383 bool isPPC64
= (PtrVT
== MVT::i64
);
2385 if (!PPCSubTarget
->useCRBits() &&
2386 isInt32Immediate(N
->getOperand(1), Imm
)) {
2387 // We can codegen setcc op, imm very efficiently compared to a brcond.
2388 // Check for those cases here.
2391 SDValue Op
= N
->getOperand(0);
2395 Op
= SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Op
), 0);
2396 SDValue Ops
[] = { Op
, getI32Imm(27, dl
), getI32Imm(5, dl
),
2397 getI32Imm(31, dl
) };
2398 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
2404 SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
2405 Op
, getI32Imm(~0U, dl
)), 0);
2406 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, AD
, Op
, AD
.getValue(1));
2410 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
2411 getI32Imm(31, dl
) };
2412 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
2417 SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Op
), 0);
2418 T
= SDValue(CurDAG
->getMachineNode(PPC::ANDC
, dl
, MVT::i32
, T
, Op
), 0);
2419 SDValue Ops
[] = { T
, getI32Imm(1, dl
), getI32Imm(31, dl
),
2420 getI32Imm(31, dl
) };
2421 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
2425 } else if (Imm
== ~0U) { // setcc op, -1
2426 SDValue Op
= N
->getOperand(0);
2431 Op
= SDValue(CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
2432 Op
, getI32Imm(1, dl
)), 0);
2433 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
,
2434 SDValue(CurDAG
->getMachineNode(PPC::LI
, dl
,
2437 0), Op
.getValue(1));
2441 Op
= SDValue(CurDAG
->getMachineNode(PPC::NOR
, dl
, MVT::i32
, Op
, Op
), 0);
2442 SDNode
*AD
= CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
2443 Op
, getI32Imm(~0U, dl
));
2444 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(AD
, 0), Op
,
2449 SDValue AD
= SDValue(CurDAG
->getMachineNode(PPC::ADDI
, dl
, MVT::i32
, Op
,
2450 getI32Imm(1, dl
)), 0);
2451 SDValue AN
= SDValue(CurDAG
->getMachineNode(PPC::AND
, dl
, MVT::i32
, AD
,
2453 SDValue Ops
[] = { AN
, getI32Imm(1, dl
), getI32Imm(31, dl
),
2454 getI32Imm(31, dl
) };
2455 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
2459 SDValue Ops
[] = { Op
, getI32Imm(1, dl
), getI32Imm(31, dl
),
2460 getI32Imm(31, dl
) };
2461 Op
= SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
2462 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Op
, getI32Imm(1, dl
));
2469 SDValue LHS
= N
->getOperand(0);
2470 SDValue RHS
= N
->getOperand(1);
2472 // Altivec Vector compare instructions do not set any CR register by default and
2473 // vector compare operations return the same type as the operands.
2474 if (LHS
.getValueType().isVector()) {
2475 if (PPCSubTarget
->hasQPX())
2478 EVT VecVT
= LHS
.getValueType();
2480 unsigned int VCmpInst
= getVCmpInst(VecVT
.getSimpleVT(), CC
,
2481 PPCSubTarget
->hasVSX(), Swap
, Negate
);
2483 std::swap(LHS
, RHS
);
2485 EVT ResVT
= VecVT
.changeVectorElementTypeToInteger();
2487 SDValue
VCmp(CurDAG
->getMachineNode(VCmpInst
, dl
, ResVT
, LHS
, RHS
), 0);
2488 CurDAG
->SelectNodeTo(N
, PPCSubTarget
->hasVSX() ? PPC::XXLNOR
: PPC::VNOR
,
2493 CurDAG
->SelectNodeTo(N
, VCmpInst
, ResVT
, LHS
, RHS
);
2497 if (PPCSubTarget
->useCRBits())
2501 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
2502 SDValue CCReg
= SelectCC(LHS
, RHS
, CC
, dl
);
2505 // Force the ccreg into CR7.
2506 SDValue CR7Reg
= CurDAG
->getRegister(PPC::CR7
, MVT::i32
);
2508 SDValue
InFlag(nullptr, 0); // Null incoming flag value.
2509 CCReg
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, CR7Reg
, CCReg
,
2510 InFlag
).getValue(1);
2512 IntCR
= SDValue(CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
, CR7Reg
,
2515 SDValue Ops
[] = { IntCR
, getI32Imm((32 - (3 - Idx
)) & 31, dl
),
2516 getI32Imm(31, dl
), getI32Imm(31, dl
) };
2518 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
2522 // Get the specified bit.
2524 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, Ops
), 0);
2525 CurDAG
->SelectNodeTo(N
, PPC::XORI
, MVT::i32
, Tmp
, getI32Imm(1, dl
));
2529 // Is this opcode a bitwise logical operation?
2530 static bool isLogicOp(unsigned Opc
) {
2531 return Opc
== ISD::AND
|| Opc
== ISD::OR
|| Opc
== ISD::XOR
;
2534 /// If this node is a sign/zero extension of an integer comparison,
2535 /// it can usually be computed in GPR's rather than using comparison
2536 /// instructions and ISEL. We only do this on 64-bit targets for now
2537 /// as the code is specialized for 64-bit (it uses 64-bit instructions
2538 /// and assumes 64-bit registers).
2539 bool PPCDAGToDAGISel::tryEXTEND(SDNode
*N
) {
2540 if (TM
.getOptLevel() == CodeGenOpt::None
|| !TM
.isPPC64())
2542 assert((N
->getOpcode() == ISD::ZERO_EXTEND
||
2543 N
->getOpcode() == ISD::SIGN_EXTEND
) &&
2544 "Expecting a zero/sign extend node!");
2547 // If we are zero-extending the result of a logical operation on i1
2548 // values, we can keep the values in GPRs.
2549 if (isLogicOp(N
->getOperand(0).getOpcode()) &&
2550 N
->getOperand(0).getValueType() == MVT::i1
&&
2551 N
->getOpcode() == ISD::ZERO_EXTEND
)
2552 WideRes
= computeLogicOpInGPR(N
->getOperand(0));
2553 else if (N
->getOperand(0).getOpcode() != ISD::SETCC
)
2557 getSETCCInGPR(N
->getOperand(0),
2558 N
->getOpcode() == ISD::SIGN_EXTEND
?
2559 SetccInGPROpts::SExtOrig
: SetccInGPROpts::ZExtOrig
);
2565 bool Input32Bit
= WideRes
.getValueType() == MVT::i32
;
2566 bool Output32Bit
= N
->getValueType(0) == MVT::i32
;
2568 NumSextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 1 : 0;
2569 NumZextSetcc
+= N
->getOpcode() == ISD::SIGN_EXTEND
? 0 : 1;
2571 SDValue ConvOp
= WideRes
;
2572 if (Input32Bit
!= Output32Bit
)
2573 ConvOp
= addExtOrTrunc(WideRes
, Input32Bit
? ExtOrTruncConversion::Ext
:
2574 ExtOrTruncConversion::Trunc
);
2575 ReplaceNode(N
, ConvOp
.getNode());
2580 // Lower a logical operation on i1 values into a GPR sequence if possible.
2581 // The result can be kept in a GPR if requested.
2582 // Three types of inputs can be handled:
2585 // - Logical operation (AND/OR/XOR)
2586 // There is also a special case that is handled (namely a complement operation
2587 // achieved with xor %a, -1).
2588 SDValue
PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp
) {
2589 assert(isLogicOp(LogicOp
.getOpcode()) &&
2590 "Can only handle logic operations here.");
2591 assert(LogicOp
.getValueType() == MVT::i1
&&
2592 "Can only handle logic operations on i1 values here.");
2596 // Special case: xor %a, -1
2597 bool IsBitwiseNegation
= isBitwiseNot(LogicOp
);
2599 // Produces a GPR sequence for each operand of the binary logic operation.
2600 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2601 // the value in a GPR and for logic operations, it will recursively produce
2602 // a GPR sequence for the operation.
2603 auto getLogicOperand
= [&] (SDValue Operand
) -> SDValue
{
2604 unsigned OperandOpcode
= Operand
.getOpcode();
2605 if (OperandOpcode
== ISD::SETCC
)
2606 return getSETCCInGPR(Operand
, SetccInGPROpts::ZExtOrig
);
2607 else if (OperandOpcode
== ISD::TRUNCATE
) {
2608 SDValue InputOp
= Operand
.getOperand(0);
2609 EVT InVT
= InputOp
.getValueType();
2611 SDValue(CurDAG
->getMachineNode(InVT
== MVT::i32
? PPC::RLDICL_32
:
2612 PPC::RLDICL
, dl
, InVT
, InputOp
,
2613 getI64Imm(0, dl
), getI64Imm(63, dl
)), 0);
2614 } else if (isLogicOp(OperandOpcode
))
2615 return computeLogicOpInGPR(Operand
);
2618 LHS
= getLogicOperand(LogicOp
.getOperand(0));
2619 RHS
= getLogicOperand(LogicOp
.getOperand(1));
2621 // If a GPR sequence can't be produced for the LHS we can't proceed.
2622 // Not producing a GPR sequence for the RHS is only a problem if this isn't
2623 // a bitwise negation operation.
2624 if (!LHS
|| (!RHS
&& !IsBitwiseNegation
))
2627 NumLogicOpsOnComparison
++;
2629 // We will use the inputs as 64-bit values.
2630 if (LHS
.getValueType() == MVT::i32
)
2631 LHS
= addExtOrTrunc(LHS
, ExtOrTruncConversion::Ext
);
2632 if (!IsBitwiseNegation
&& RHS
.getValueType() == MVT::i32
)
2633 RHS
= addExtOrTrunc(RHS
, ExtOrTruncConversion::Ext
);
2636 switch (LogicOp
.getOpcode()) {
2637 default: llvm_unreachable("Unknown logic operation.");
2638 case ISD::AND
: NewOpc
= PPC::AND8
; break;
2639 case ISD::OR
: NewOpc
= PPC::OR8
; break;
2640 case ISD::XOR
: NewOpc
= PPC::XOR8
; break;
2643 if (IsBitwiseNegation
) {
2644 RHS
= getI64Imm(1, dl
);
2645 NewOpc
= PPC::XORI8
;
2648 return SDValue(CurDAG
->getMachineNode(NewOpc
, dl
, MVT::i64
, LHS
, RHS
), 0);
2652 /// Try performing logical operations on results of comparisons in GPRs.
2653 /// It is typically preferred from a performance perspective over performing
2654 /// the operations on individual bits in the CR. We only do this on 64-bit
2655 /// targets for now as the code is specialized for 64-bit (it uses 64-bit
2656 /// instructions and assumes 64-bit registers).
2657 bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode
*N
) {
2658 if (TM
.getOptLevel() == CodeGenOpt::None
|| !TM
.isPPC64())
2660 if (N
->getValueType(0) != MVT::i1
)
2662 assert(isLogicOp(N
->getOpcode()) &&
2663 "Expected a logic operation on setcc results.");
2664 SDValue LoweredLogical
= computeLogicOpInGPR(SDValue(N
, 0));
2665 if (!LoweredLogical
)
2669 bool IsBitwiseNegate
= LoweredLogical
.getMachineOpcode() == PPC::XORI8
;
2670 unsigned SubRegToExtract
= IsBitwiseNegate
? PPC::sub_eq
: PPC::sub_gt
;
2671 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
2672 SDValue LHS
= LoweredLogical
.getOperand(0);
2673 SDValue RHS
= LoweredLogical
.getOperand(1);
2675 SDValue OpToConvToRecForm
;
2677 // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode
2678 // that is input to the XORI.
2679 if (IsBitwiseNegate
&&
2680 LoweredLogical
.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG
)
2681 OpToConvToRecForm
= LoweredLogical
.getOperand(0).getOperand(1);
2682 else if (IsBitwiseNegate
)
2683 // If the input to the XORI isn't an extension, that's what we're after.
2684 OpToConvToRecForm
= LoweredLogical
.getOperand(0);
2686 // If this is not an XORI, it is a reg-reg logical op and we can convert it
2688 OpToConvToRecForm
= LoweredLogical
;
2690 // Get the record-form version of the node we're looking to use to get the
2692 uint16_t NonRecOpc
= OpToConvToRecForm
.getMachineOpcode();
2693 int NewOpc
= PPCInstrInfo::getRecordFormOpcode(NonRecOpc
);
2695 // Convert the right node to record-form. This is either the logical we're
2696 // looking at or it is the input node to the negation (if we're looking at
2697 // a bitwise negation).
2698 if (NewOpc
!= -1 && IsBitwiseNegate
) {
2699 // The input to the XORI has a record-form. Use it.
2700 assert(LoweredLogical
.getConstantOperandVal(1) == 1 &&
2701 "Expected a PPC::XORI8 only for bitwise negation.");
2702 // Emit the record-form instruction.
2703 std::vector
<SDValue
> Ops
;
2704 for (int i
= 0, e
= OpToConvToRecForm
.getNumOperands(); i
< e
; i
++)
2705 Ops
.push_back(OpToConvToRecForm
.getOperand(i
));
2708 SDValue(CurDAG
->getMachineNode(NewOpc
, dl
,
2709 OpToConvToRecForm
.getValueType(),
2710 MVT::Glue
, Ops
), 0);
2712 assert((NewOpc
!= -1 || !IsBitwiseNegate
) &&
2713 "No record form available for AND8/OR8/XOR8?");
2715 SDValue(CurDAG
->getMachineNode(NewOpc
== -1 ? PPC::ANDIo8
: NewOpc
, dl
,
2716 MVT::i64
, MVT::Glue
, LHS
, RHS
), 0);
2719 // Select this node to a single bit from CR0 set by the record-form node
2720 // just created. For bitwise negation, use the EQ bit which is the equivalent
2721 // of negating the result (i.e. it is a bit set when the result of the
2722 // operation is zero).
2724 CurDAG
->getTargetConstant(SubRegToExtract
, dl
, MVT::i32
);
2726 SDValue(CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
, dl
,
2727 MVT::i1
, CR0Reg
, SRIdxVal
,
2728 WideOp
.getValue(1)), 0);
2729 ReplaceNode(N
, CRBit
.getNode());
2733 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2734 /// Otherwise just reinterpret it as a 64-bit value.
2735 /// Useful when emitting comparison code for 32-bit values without using
2736 /// the compare instruction (which only considers the lower 32-bits).
2737 SDValue
PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input
) {
2738 assert(Input
.getValueType() == MVT::i32
&&
2739 "Can only sign-extend 32-bit values here.");
2740 unsigned Opc
= Input
.getOpcode();
2742 // The value was sign extended and then truncated to 32-bits. No need to
2743 // sign extend it again.
2744 if (Opc
== ISD::TRUNCATE
&&
2745 (Input
.getOperand(0).getOpcode() == ISD::AssertSext
||
2746 Input
.getOperand(0).getOpcode() == ISD::SIGN_EXTEND
))
2747 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2749 LoadSDNode
*InputLoad
= dyn_cast
<LoadSDNode
>(Input
);
2750 // The input is a sign-extending load. No reason to sign-extend.
2751 if (InputLoad
&& InputLoad
->getExtensionType() == ISD::SEXTLOAD
)
2752 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2754 ConstantSDNode
*InputConst
= dyn_cast
<ConstantSDNode
>(Input
);
2755 // We don't sign-extend constants and already sign-extended values.
2756 if (InputConst
|| Opc
== ISD::AssertSext
|| Opc
== ISD::SIGN_EXTEND_INREG
||
2757 Opc
== ISD::SIGN_EXTEND
)
2758 return addExtOrTrunc(Input
, ExtOrTruncConversion::Ext
);
2761 SignExtensionsAdded
++;
2762 return SDValue(CurDAG
->getMachineNode(PPC::EXTSW_32_64
, dl
,
2763 MVT::i64
, Input
), 0);
2766 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2767 // course not actual zero/sign extensions that will generate machine code,
2768 // they're just a way to reinterpret a 32 bit value in a register as a
2769 // 64 bit value and vice-versa.
2770 SDValue
PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes
,
2771 ExtOrTruncConversion Conv
) {
2772 SDLoc
dl(NatWidthRes
);
2774 // For reinterpreting 32-bit values as 64 bit values, we generate
2775 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2776 if (Conv
== ExtOrTruncConversion::Ext
) {
2777 SDValue
ImDef(CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
, MVT::i64
), 0);
2779 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2780 return SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
, MVT::i64
,
2781 ImDef
, NatWidthRes
, SubRegIdx
), 0);
2784 assert(Conv
== ExtOrTruncConversion::Trunc
&&
2785 "Unknown convertion between 32 and 64 bit values.");
2786 // For reinterpreting 64-bit values as 32-bit values, we just need to
2787 // EXTRACT_SUBREG (i.e. extract the low word).
2789 CurDAG
->getTargetConstant(PPC::sub_32
, dl
, MVT::i32
);
2790 return SDValue(CurDAG
->getMachineNode(PPC::EXTRACT_SUBREG
, dl
, MVT::i32
,
2791 NatWidthRes
, SubRegIdx
), 0);
2794 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2795 // Handle both zero-extensions and sign-extensions.
2796 SDValue
PPCDAGToDAGISel::getCompoundZeroComparisonInGPR(SDValue LHS
, SDLoc dl
,
2797 ZeroCompare CmpTy
) {
2798 EVT InVT
= LHS
.getValueType();
2799 bool Is32Bit
= InVT
== MVT::i32
;
2802 // Produce the value that needs to be either zero or sign extended.
2804 default: llvm_unreachable("Unknown Zero-comparison type.");
2805 case ZeroCompare::GEZExt
:
2806 case ZeroCompare::GESExt
:
2807 ToExtend
= SDValue(CurDAG
->getMachineNode(Is32Bit
? PPC::NOR
: PPC::NOR8
,
2808 dl
, InVT
, LHS
, LHS
), 0);
2809 case ZeroCompare::LEZExt
:
2810 case ZeroCompare::LESExt
: {
2812 // Upper 32 bits cannot be undefined for this sequence.
2813 LHS
= signExtendInputIfNeeded(LHS
);
2815 SDValue(CurDAG
->getMachineNode(PPC::NEG8
, dl
, MVT::i64
, LHS
), 0);
2817 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2818 Neg
, getI64Imm(1, dl
),
2819 getI64Imm(63, dl
)), 0);
2822 SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, LHS
,
2823 getI64Imm(~0ULL, dl
)), 0);
2824 ToExtend
= SDValue(CurDAG
->getMachineNode(PPC::OR8
, dl
, MVT::i64
,
2830 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2831 if (!Is32Bit
&& (CmpTy
== ZeroCompare::GEZExt
|| ZeroCompare::LEZExt
))
2832 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2833 ToExtend
, getI64Imm(1, dl
),
2834 getI64Imm(63, dl
)), 0);
2835 if (!Is32Bit
&& (CmpTy
== ZeroCompare::GESExt
|| ZeroCompare::LESExt
))
2836 return SDValue(CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, ToExtend
,
2837 getI64Imm(63, dl
)), 0);
2839 assert(Is32Bit
&& "Should have handled the 32-bit sequences above.");
2840 // For 32-bit sequences, the extensions differ between GE/LE cases.
2842 default: llvm_unreachable("Unknown Zero-comparison type.");
2843 case ZeroCompare::GEZExt
: {
2844 SDValue ShiftOps
[] =
2845 { ToExtend
, getI32Imm(1, dl
), getI32Imm(31, dl
), getI32Imm(31, dl
) };
2846 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2849 case ZeroCompare::GESExt
:
2850 return SDValue(CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, ToExtend
,
2851 getI32Imm(31, dl
)), 0);
2852 case ZeroCompare::LEZExt
:
2853 return SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
, ToExtend
,
2854 getI32Imm(1, dl
)), 0);
2855 case ZeroCompare::LESExt
:
2856 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, ToExtend
,
2857 getI32Imm(-1, dl
)), 0);
2861 /// Produces a zero-extended result of comparing two 32-bit values according to
2862 /// the passed condition code.
2863 SDValue
PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS
, SDValue RHS
,
2865 int64_t RHSValue
, SDLoc dl
) {
2866 bool IsRHSZero
= RHSValue
== 0;
2868 default: return SDValue();
2870 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2871 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
2872 SDValue Xor
= IsRHSZero
? LHS
:
2873 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2875 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2876 SDValue ShiftOps
[] = { Clz
, getI32Imm(27, dl
), getI32Imm(5, dl
),
2877 getI32Imm(31, dl
) };
2878 return SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
,
2882 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2883 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
2884 SDValue Xor
= IsRHSZero
? LHS
:
2885 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2887 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2888 SDValue ShiftOps
[] = { Clz
, getI32Imm(27, dl
), getI32Imm(5, dl
),
2889 getI32Imm(31, dl
) };
2891 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
2892 return SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
2893 getI32Imm(1, dl
)), 0);
2896 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2897 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
2899 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GEZExt
);
2900 std::swap(LHS
, RHS
);
2901 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2902 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2906 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2907 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
2909 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LEZExt
);
2911 // The upper 32-bits of the register can't be undefined for this sequence.
2912 LHS
= signExtendInputIfNeeded(LHS
);
2913 RHS
= signExtendInputIfNeeded(RHS
);
2915 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, LHS
, RHS
), 0);
2917 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Sub
,
2918 getI64Imm(1, dl
), getI64Imm(63, dl
)), 0);
2920 SDValue(CurDAG
->getMachineNode(PPC::XORI8
, dl
,
2921 MVT::i64
, Shift
, getI32Imm(1, dl
)), 0);
2926 /// Produces a sign-extended result of comparing two 32-bit values according to
2927 /// the passed condition code.
2928 SDValue
PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS
, SDValue RHS
,
2930 int64_t RHSValue
, SDLoc dl
) {
2931 bool IsRHSZero
= RHSValue
== 0;
2933 default: return SDValue();
2935 // (sext (setcc %a, %b, seteq)) ->
2936 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
2937 // (sext (setcc %a, 0, seteq)) ->
2938 // (ashr (shl (ctlz %a), 58), 63)
2939 SDValue CountInput
= IsRHSZero
? LHS
:
2940 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2942 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, CountInput
), 0);
2943 SDValue SHLOps
[] = { Cntlzw
, getI32Imm(27, dl
),
2944 getI32Imm(5, dl
), getI32Imm(31, dl
) };
2946 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, SHLOps
), 0);
2947 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Slwi
), 0);
2950 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
2951 // flip the bit, finally take 2's complement.
2952 // (sext (setcc %a, %b, setne)) ->
2953 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
2954 // Same as above, but the first xor is not needed.
2955 // (sext (setcc %a, 0, setne)) ->
2956 // (neg (xor (lshr (ctlz %a), 5), 1))
2957 SDValue Xor
= IsRHSZero
? LHS
:
2958 SDValue(CurDAG
->getMachineNode(PPC::XOR
, dl
, MVT::i32
, LHS
, RHS
), 0);
2960 SDValue(CurDAG
->getMachineNode(PPC::CNTLZW
, dl
, MVT::i32
, Xor
), 0);
2961 SDValue ShiftOps
[] =
2962 { Clz
, getI32Imm(27, dl
), getI32Imm(5, dl
), getI32Imm(31, dl
) };
2964 SDValue(CurDAG
->getMachineNode(PPC::RLWINM
, dl
, MVT::i32
, ShiftOps
), 0);
2966 SDValue(CurDAG
->getMachineNode(PPC::XORI
, dl
, MVT::i32
, Shift
,
2967 getI32Imm(1, dl
)), 0);
2968 return SDValue(CurDAG
->getMachineNode(PPC::NEG
, dl
, MVT::i32
, Xori
), 0);
2971 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
2972 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
2974 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::GESExt
);
2975 std::swap(LHS
, RHS
);
2976 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
2977 IsRHSZero
= RHSConst
&& RHSConst
->isNullValue();
2981 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
2982 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
2984 return getCompoundZeroComparisonInGPR(LHS
, dl
, ZeroCompare::LESExt
);
2986 // The upper 32-bits of the register can't be undefined for this sequence.
2987 LHS
= signExtendInputIfNeeded(LHS
);
2988 RHS
= signExtendInputIfNeeded(RHS
);
2990 SDValue(CurDAG
->getMachineNode(PPC::SUBF8
, dl
, MVT::i64
, MVT::Glue
,
2993 SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
,
2994 SUBFNode
, getI64Imm(1, dl
),
2995 getI64Imm(63, dl
)), 0);
2996 return SDValue(CurDAG
->getMachineNode(PPC::ADDI8
, dl
, MVT::i64
, Srdi
,
2997 getI32Imm(-1, dl
)), 0);
3002 /// Produces a zero-extended result of comparing two 64-bit values according to
3003 /// the passed condition code.
3004 SDValue
PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS
, SDValue RHS
,
3006 int64_t RHSValue
, SDLoc dl
) {
3007 bool IsRHSZero
= RHSValue
== 0;
3009 default: return SDValue();
3011 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3012 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3013 SDValue Xor
= IsRHSZero
? LHS
:
3014 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3016 SDValue(CurDAG
->getMachineNode(PPC::CNTLZD
, dl
, MVT::i64
, Xor
), 0);
3017 return SDValue(CurDAG
->getMachineNode(PPC::RLDICL
, dl
, MVT::i64
, Clz
,
3018 getI64Imm(58, dl
), getI64Imm(63, dl
)),
3022 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3023 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3024 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3025 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3026 SDValue Xor
= IsRHSZero
? LHS
:
3027 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3029 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3030 Xor
, getI32Imm(~0U, dl
)), 0);
3031 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, AC
,
3032 Xor
, AC
.getValue(1)), 0);
3037 /// Produces a sign-extended result of comparing two 64-bit values according to
3038 /// the passed condition code.
3039 SDValue
PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS
, SDValue RHS
,
3041 int64_t RHSValue
, SDLoc dl
) {
3042 bool IsRHSZero
= RHSValue
== 0;
3044 default: return SDValue();
3046 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3047 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3048 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3049 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3050 SDValue AddInput
= IsRHSZero
? LHS
:
3051 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3053 SDValue(CurDAG
->getMachineNode(PPC::ADDIC8
, dl
, MVT::i64
, MVT::Glue
,
3054 AddInput
, getI32Imm(~0U, dl
)), 0);
3055 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, Addic
,
3056 Addic
, Addic
.getValue(1)), 0);
3059 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3060 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3061 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3062 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3063 SDValue Xor
= IsRHSZero
? LHS
:
3064 SDValue(CurDAG
->getMachineNode(PPC::XOR8
, dl
, MVT::i64
, LHS
, RHS
), 0);
3066 SDValue(CurDAG
->getMachineNode(PPC::SUBFIC8
, dl
, MVT::i64
, MVT::Glue
,
3067 Xor
, getI32Imm(0, dl
)), 0);
3068 return SDValue(CurDAG
->getMachineNode(PPC::SUBFE8
, dl
, MVT::i64
, SC
,
3069 SC
, SC
.getValue(1)), 0);
3074 /// Does this SDValue have any uses for which keeping the value in a GPR is
3075 /// appropriate. This is meant to be used on values that have type i1 since
3076 /// it is somewhat meaningless to ask if values of other types can be kept in
3078 static bool allUsesExtend(SDValue Compare
, SelectionDAG
*CurDAG
) {
3079 assert(Compare
.getOpcode() == ISD::SETCC
&&
3080 "An ISD::SETCC node required here.");
3082 // For values that have a single use, the caller should obviously already have
3083 // checked if that use is an extending use. We check the other uses here.
3084 if (Compare
.hasOneUse())
3086 // We want the value in a GPR if it is being extended, used for a select, or
3087 // used in logical operations.
3088 for (auto CompareUse
: Compare
.getNode()->uses())
3089 if (CompareUse
->getOpcode() != ISD::SIGN_EXTEND
&&
3090 CompareUse
->getOpcode() != ISD::ZERO_EXTEND
&&
3091 CompareUse
->getOpcode() != ISD::SELECT
&&
3092 !isLogicOp(CompareUse
->getOpcode())) {
3093 OmittedForNonExtendUses
++;
3099 /// Returns an equivalent of a SETCC node but with the result the same width as
3100 /// the inputs. This can nalso be used for SELECT_CC if either the true or false
3101 /// values is a power of two while the other is zero.
3102 SDValue
PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare
,
3103 SetccInGPROpts ConvOpts
) {
3104 assert((Compare
.getOpcode() == ISD::SETCC
||
3105 Compare
.getOpcode() == ISD::SELECT_CC
) &&
3106 "An ISD::SETCC node required here.");
3108 // Don't convert this comparison to a GPR sequence because there are uses
3109 // of the i1 result (i.e. uses that require the result in the CR).
3110 if ((Compare
.getOpcode() == ISD::SETCC
) && !allUsesExtend(Compare
, CurDAG
))
3113 SDValue LHS
= Compare
.getOperand(0);
3114 SDValue RHS
= Compare
.getOperand(1);
3116 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3117 int CCOpNum
= Compare
.getOpcode() == ISD::SELECT_CC
? 4 : 2;
3119 cast
<CondCodeSDNode
>(Compare
.getOperand(CCOpNum
))->get();
3120 EVT InputVT
= LHS
.getValueType();
3121 if (InputVT
!= MVT::i32
&& InputVT
!= MVT::i64
)
3124 if (ConvOpts
== SetccInGPROpts::ZExtInvert
||
3125 ConvOpts
== SetccInGPROpts::SExtInvert
)
3126 CC
= ISD::getSetCCInverse(CC
, true);
3128 bool Inputs32Bit
= InputVT
== MVT::i32
;
3131 ConstantSDNode
*RHSConst
= dyn_cast
<ConstantSDNode
>(RHS
);
3132 int64_t RHSValue
= RHSConst
? RHSConst
->getSExtValue() : INT64_MAX
;
3133 bool IsSext
= ConvOpts
== SetccInGPROpts::SExtOrig
||
3134 ConvOpts
== SetccInGPROpts::SExtInvert
;
3136 if (IsSext
&& Inputs32Bit
)
3137 return get32BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3138 else if (Inputs32Bit
)
3139 return get32BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3141 return get64BitSExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3142 return get64BitZExtCompare(LHS
, RHS
, CC
, RHSValue
, dl
);
3145 /// Does this node represent a load/store node whose address can be represented
3146 /// with a register plus an immediate that's a multiple of \p Val:
3147 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode
*N
, unsigned Val
) const {
3148 LoadSDNode
*LDN
= dyn_cast
<LoadSDNode
>(N
);
3149 StoreSDNode
*STN
= dyn_cast
<StoreSDNode
>(N
);
3152 AddrOp
= LDN
->getOperand(1);
3154 AddrOp
= STN
->getOperand(2);
3157 if (AddrOp
.getOpcode() == ISD::ADD
) {
3158 // If op0 is a frame index that is under aligned, we can't do it either,
3159 // because it is translated to r31 or r1 + slot + offset. We won't know the
3160 // slot number until the stack frame is finalized.
3161 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(AddrOp
.getOperand(0))) {
3162 const MachineFrameInfo
&MFI
= CurDAG
->getMachineFunction().getFrameInfo();
3163 unsigned SlotAlign
= MFI
.getObjectAlignment(FI
->getIndex());
3164 if ((SlotAlign
% Val
) != 0)
3167 return isIntS16Immediate(AddrOp
.getOperand(1), Imm
) && !(Imm
% Val
);
3170 // If the address comes from the outside, the offset will be zero.
3171 return AddrOp
.getOpcode() == ISD::CopyFromReg
;
3174 void PPCDAGToDAGISel::transferMemOperands(SDNode
*N
, SDNode
*Result
) {
3175 // Transfer memoperands.
3176 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
3177 MemOp
[0] = cast
<MemSDNode
>(N
)->getMemOperand();
3178 cast
<MachineSDNode
>(Result
)->setMemRefs(MemOp
, MemOp
+ 1);
3181 // Select - Convert the specified operand from a target-independent to a
3182 // target-specific node if it hasn't already been changed.
3183 void PPCDAGToDAGISel::Select(SDNode
*N
) {
3185 if (N
->isMachineOpcode()) {
3187 return; // Already selected.
3190 // In case any misguided DAG-level optimizations form an ADD with a
3191 // TargetConstant operand, crash here instead of miscompiling (by selecting
3192 // an r+r add instead of some kind of r+i add).
3193 if (N
->getOpcode() == ISD::ADD
&&
3194 N
->getOperand(1).getOpcode() == ISD::TargetConstant
)
3195 llvm_unreachable("Invalid ADD with TargetConstant operand");
3197 // Try matching complex bit permutations before doing anything else.
3198 if (tryBitPermutation(N
))
3201 switch (N
->getOpcode()) {
3205 if (N
->getValueType(0) == MVT::i64
) {
3206 ReplaceNode(N
, selectI64Imm(CurDAG
, N
));
3211 case ISD::ZERO_EXTEND
:
3212 case ISD::SIGN_EXTEND
:
3222 case PPCISD::GlobalBaseReg
:
3223 ReplaceNode(N
, getGlobalBaseReg());
3226 case ISD::FrameIndex
:
3227 selectFrameIndex(N
, N
);
3230 case PPCISD::MFOCRF
: {
3231 SDValue InFlag
= N
->getOperand(1);
3232 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::MFOCRF
, dl
, MVT::i32
,
3233 N
->getOperand(0), InFlag
));
3237 case PPCISD::READ_TIME_BASE
:
3238 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ReadTB
, dl
, MVT::i32
, MVT::i32
,
3239 MVT::Other
, N
->getOperand(0)));
3242 case PPCISD::SRA_ADDZE
: {
3243 SDValue N0
= N
->getOperand(0);
3245 CurDAG
->getTargetConstant(*cast
<ConstantSDNode
>(N
->getOperand(1))->
3246 getConstantIntValue(), dl
,
3247 N
->getValueType(0));
3248 if (N
->getValueType(0) == MVT::i64
) {
3250 CurDAG
->getMachineNode(PPC::SRADI
, dl
, MVT::i64
, MVT::Glue
,
3252 CurDAG
->SelectNodeTo(N
, PPC::ADDZE8
, MVT::i64
, SDValue(Op
, 0),
3256 assert(N
->getValueType(0) == MVT::i32
&&
3257 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
3259 CurDAG
->getMachineNode(PPC::SRAWI
, dl
, MVT::i32
, MVT::Glue
,
3261 CurDAG
->SelectNodeTo(N
, PPC::ADDZE
, MVT::i32
, SDValue(Op
, 0),
3268 // Handle preincrement loads.
3269 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
3270 EVT LoadedVT
= LD
->getMemoryVT();
3272 // Normal loads are handled by code generated from the .td file.
3273 if (LD
->getAddressingMode() != ISD::PRE_INC
)
3276 SDValue Offset
= LD
->getOffset();
3277 if (Offset
.getOpcode() == ISD::TargetConstant
||
3278 Offset
.getOpcode() == ISD::TargetGlobalAddress
) {
3281 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
3282 if (LD
->getValueType(0) != MVT::i64
) {
3283 // Handle PPC32 integer and normal FP loads.
3284 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
3285 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
3286 default: llvm_unreachable("Invalid PPC load type!");
3287 case MVT::f64
: Opcode
= PPC::LFDU
; break;
3288 case MVT::f32
: Opcode
= PPC::LFSU
; break;
3289 case MVT::i32
: Opcode
= PPC::LWZU
; break;
3290 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU
: PPC::LHZU
; break;
3292 case MVT::i8
: Opcode
= PPC::LBZU
; break;
3295 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
3296 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
3297 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
3298 default: llvm_unreachable("Invalid PPC load type!");
3299 case MVT::i64
: Opcode
= PPC::LDU
; break;
3300 case MVT::i32
: Opcode
= PPC::LWZU8
; break;
3301 case MVT::i16
: Opcode
= isSExt
? PPC::LHAU8
: PPC::LHZU8
; break;
3303 case MVT::i8
: Opcode
= PPC::LBZU8
; break;
3307 SDValue Chain
= LD
->getChain();
3308 SDValue Base
= LD
->getBasePtr();
3309 SDValue Ops
[] = { Offset
, Base
, Chain
};
3310 SDNode
*MN
= CurDAG
->getMachineNode(
3311 Opcode
, dl
, LD
->getValueType(0),
3312 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
3313 transferMemOperands(N
, MN
);
3318 bool isSExt
= LD
->getExtensionType() == ISD::SEXTLOAD
;
3319 if (LD
->getValueType(0) != MVT::i64
) {
3320 // Handle PPC32 integer and normal FP loads.
3321 assert((!isSExt
|| LoadedVT
== MVT::i16
) && "Invalid sext update load");
3322 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
3323 default: llvm_unreachable("Invalid PPC load type!");
3324 case MVT::v4f64
: Opcode
= PPC::QVLFDUX
; break; // QPX
3325 case MVT::v4f32
: Opcode
= PPC::QVLFSUX
; break; // QPX
3326 case MVT::f64
: Opcode
= PPC::LFDUX
; break;
3327 case MVT::f32
: Opcode
= PPC::LFSUX
; break;
3328 case MVT::i32
: Opcode
= PPC::LWZUX
; break;
3329 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX
: PPC::LHZUX
; break;
3331 case MVT::i8
: Opcode
= PPC::LBZUX
; break;
3334 assert(LD
->getValueType(0) == MVT::i64
&& "Unknown load result type!");
3335 assert((!isSExt
|| LoadedVT
== MVT::i16
|| LoadedVT
== MVT::i32
) &&
3336 "Invalid sext update load");
3337 switch (LoadedVT
.getSimpleVT().SimpleTy
) {
3338 default: llvm_unreachable("Invalid PPC load type!");
3339 case MVT::i64
: Opcode
= PPC::LDUX
; break;
3340 case MVT::i32
: Opcode
= isSExt
? PPC::LWAUX
: PPC::LWZUX8
; break;
3341 case MVT::i16
: Opcode
= isSExt
? PPC::LHAUX8
: PPC::LHZUX8
; break;
3343 case MVT::i8
: Opcode
= PPC::LBZUX8
; break;
3347 SDValue Chain
= LD
->getChain();
3348 SDValue Base
= LD
->getBasePtr();
3349 SDValue Ops
[] = { Base
, Offset
, Chain
};
3350 SDNode
*MN
= CurDAG
->getMachineNode(
3351 Opcode
, dl
, LD
->getValueType(0),
3352 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()), MVT::Other
, Ops
);
3353 transferMemOperands(N
, MN
);
3360 if (tryLogicOpOfCompares(N
))
3363 unsigned Imm
, Imm2
, SH
, MB
, ME
;
3366 // If this is an and of a value rotated between 0 and 31 bits and then and'd
3367 // with a mask, emit rlwinm
3368 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
3369 isRotateAndMask(N
->getOperand(0).getNode(), Imm
, false, SH
, MB
, ME
)) {
3370 SDValue Val
= N
->getOperand(0).getOperand(0);
3371 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
3372 getI32Imm(ME
, dl
) };
3373 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3376 // If this is just a masked value where the input is not handled above, and
3377 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
3378 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
3379 isRunOfOnes(Imm
, MB
, ME
) &&
3380 N
->getOperand(0).getOpcode() != ISD::ROTL
) {
3381 SDValue Val
= N
->getOperand(0);
3382 SDValue Ops
[] = { Val
, getI32Imm(0, dl
), getI32Imm(MB
, dl
),
3383 getI32Imm(ME
, dl
) };
3384 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3387 // If this is a 64-bit zero-extension mask, emit rldicl.
3388 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
3390 SDValue Val
= N
->getOperand(0);
3391 MB
= 64 - countTrailingOnes(Imm64
);
3394 if (Val
.getOpcode() == ISD::ANY_EXTEND
) {
3395 auto Op0
= Val
.getOperand(0);
3396 if ( Op0
.getOpcode() == ISD::SRL
&&
3397 isInt32Immediate(Op0
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
3399 auto ResultType
= Val
.getNode()->getValueType(0);
3400 auto ImDef
= CurDAG
->getMachineNode(PPC::IMPLICIT_DEF
, dl
,
3402 SDValue
IDVal (ImDef
, 0);
3404 Val
= SDValue(CurDAG
->getMachineNode(PPC::INSERT_SUBREG
, dl
,
3405 ResultType
, IDVal
, Op0
.getOperand(0),
3406 getI32Imm(1, dl
)), 0);
3411 // If the operand is a logical right shift, we can fold it into this
3412 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
3413 // for n <= mb. The right shift is really a left rotate followed by a
3414 // mask, and this mask is a more-restrictive sub-mask of the mask implied
3416 if (Val
.getOpcode() == ISD::SRL
&&
3417 isInt32Immediate(Val
.getOperand(1).getNode(), Imm
) && Imm
<= MB
) {
3418 assert(Imm
< 64 && "Illegal shift amount");
3419 Val
= Val
.getOperand(0);
3423 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
3424 CurDAG
->SelectNodeTo(N
, PPC::RLDICL
, MVT::i64
, Ops
);
3427 // If this is a negated 64-bit zero-extension mask,
3428 // i.e. the immediate is a sequence of ones from most significant side
3429 // and all zero for reminder, we should use rldicr.
3430 if (isInt64Immediate(N
->getOperand(1).getNode(), Imm64
) &&
3431 isMask_64(~Imm64
)) {
3432 SDValue Val
= N
->getOperand(0);
3433 MB
= 63 - countTrailingOnes(~Imm64
);
3435 SDValue Ops
[] = { Val
, getI32Imm(SH
, dl
), getI32Imm(MB
, dl
) };
3436 CurDAG
->SelectNodeTo(N
, PPC::RLDICR
, MVT::i64
, Ops
);
3440 // AND X, 0 -> 0, not "rlwinm 32".
3441 if (isInt32Immediate(N
->getOperand(1), Imm
) && (Imm
== 0)) {
3442 ReplaceUses(SDValue(N
, 0), N
->getOperand(1));
3445 // ISD::OR doesn't get all the bitfield insertion fun.
3446 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
3448 if (isInt32Immediate(N
->getOperand(1), Imm
) &&
3449 N
->getOperand(0).getOpcode() == ISD::OR
&&
3450 isInt32Immediate(N
->getOperand(0).getOperand(1), Imm2
)) {
3451 // The idea here is to check whether this is equivalent to:
3452 // (c1 & m) | (x & ~m)
3453 // where m is a run-of-ones mask. The logic here is that, for each bit in
3455 // - if both are 1, then the output will be 1.
3456 // - if both are 0, then the output will be 0.
3457 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
3459 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
3461 // If that last condition is never the case, then we can form m from the
3462 // bits that are the same between c1 and c2.
3464 if (isRunOfOnes(~(Imm
^Imm2
), MB
, ME
) && !(~Imm
& Imm2
)) {
3465 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3466 N
->getOperand(0).getOperand(1),
3467 getI32Imm(0, dl
), getI32Imm(MB
, dl
),
3468 getI32Imm(ME
, dl
) };
3469 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::RLWIMI
, dl
, MVT::i32
, Ops
));
3474 // Other cases are autogenerated.
3478 if (N
->getValueType(0) == MVT::i32
)
3479 if (tryBitfieldInsert(N
))
3482 if (tryLogicOpOfCompares(N
))
3486 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
3487 isIntS16Immediate(N
->getOperand(1), Imm
)) {
3489 CurDAG
->computeKnownBits(N
->getOperand(0), LHSKnown
);
3491 // If this is equivalent to an add, then we can fold it with the
3492 // FrameIndex calculation.
3493 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)Imm
) == ~0ULL) {
3494 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
3499 // OR with a 32-bit immediate can be handled by ori + oris
3500 // without creating an immediate in a GPR.
3502 bool IsPPC64
= PPCSubTarget
->isPPC64();
3503 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
3504 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
3505 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
3506 uint64_t ImmHi
= Imm64
>> 16;
3507 uint64_t ImmLo
= Imm64
& 0xFFFF;
3508 if (ImmHi
!= 0 && ImmLo
!= 0) {
3509 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::ORI8
, dl
, MVT::i64
,
3511 getI16Imm(ImmLo
, dl
));
3512 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
3513 CurDAG
->SelectNodeTo(N
, PPC::ORIS8
, MVT::i64
, Ops1
);
3518 // Other cases are autogenerated.
3522 if (tryLogicOpOfCompares(N
))
3525 // XOR with a 32-bit immediate can be handled by xori + xoris
3526 // without creating an immediate in a GPR.
3528 bool IsPPC64
= PPCSubTarget
->isPPC64();
3529 if (IsPPC64
&& isInt64Immediate(N
->getOperand(1), Imm64
) &&
3530 (Imm64
& ~0xFFFFFFFFuLL
) == 0) {
3531 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
3532 uint64_t ImmHi
= Imm64
>> 16;
3533 uint64_t ImmLo
= Imm64
& 0xFFFF;
3534 if (ImmHi
!= 0 && ImmLo
!= 0) {
3535 SDNode
*Lo
= CurDAG
->getMachineNode(PPC::XORI8
, dl
, MVT::i64
,
3537 getI16Imm(ImmLo
, dl
));
3538 SDValue Ops1
[] = { SDValue(Lo
, 0), getI16Imm(ImmHi
, dl
)};
3539 CurDAG
->SelectNodeTo(N
, PPC::XORIS8
, MVT::i64
, Ops1
);
3548 if (N
->getOperand(0)->getOpcode() == ISD::FrameIndex
&&
3549 isIntS16Immediate(N
->getOperand(1), Imm
)) {
3550 selectFrameIndex(N
, N
->getOperand(0).getNode(), (int)Imm
);
3557 unsigned Imm
, SH
, MB
, ME
;
3558 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
3559 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
3560 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3561 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
3562 getI32Imm(ME
, dl
) };
3563 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3567 // Other cases are autogenerated.
3571 unsigned Imm
, SH
, MB
, ME
;
3572 if (isOpcWithIntImmediate(N
->getOperand(0).getNode(), ISD::AND
, Imm
) &&
3573 isRotateAndMask(N
, Imm
, true, SH
, MB
, ME
)) {
3574 SDValue Ops
[] = { N
->getOperand(0).getOperand(0),
3575 getI32Imm(SH
, dl
), getI32Imm(MB
, dl
),
3576 getI32Imm(ME
, dl
) };
3577 CurDAG
->SelectNodeTo(N
, PPC::RLWINM
, MVT::i32
, Ops
);
3581 // Other cases are autogenerated.
3584 // FIXME: Remove this once the ANDI glue bug is fixed:
3585 case PPCISD::ANDIo_1_EQ_BIT
:
3586 case PPCISD::ANDIo_1_GT_BIT
: {
3590 EVT InVT
= N
->getOperand(0).getValueType();
3591 assert((InVT
== MVT::i64
|| InVT
== MVT::i32
) &&
3592 "Invalid input type for ANDIo_1_EQ_BIT");
3594 unsigned Opcode
= (InVT
== MVT::i64
) ? PPC::ANDIo8
: PPC::ANDIo
;
3595 SDValue
AndI(CurDAG
->getMachineNode(Opcode
, dl
, InVT
, MVT::Glue
,
3597 CurDAG
->getTargetConstant(1, dl
, InVT
)),
3599 SDValue CR0Reg
= CurDAG
->getRegister(PPC::CR0
, MVT::i32
);
3601 CurDAG
->getTargetConstant(N
->getOpcode() == PPCISD::ANDIo_1_EQ_BIT
?
3602 PPC::sub_eq
: PPC::sub_gt
, dl
, MVT::i32
);
3604 CurDAG
->SelectNodeTo(N
, TargetOpcode::EXTRACT_SUBREG
, MVT::i1
, CR0Reg
,
3605 SRIdxVal
, SDValue(AndI
.getNode(), 1) /* glue */);
3608 case ISD::SELECT_CC
: {
3609 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
3611 CurDAG
->getTargetLoweringInfo().getPointerTy(CurDAG
->getDataLayout());
3612 bool isPPC64
= (PtrVT
== MVT::i64
);
3614 // If this is a select of i1 operands, we'll pattern match it.
3615 if (PPCSubTarget
->useCRBits() &&
3616 N
->getOperand(0).getValueType() == MVT::i1
)
3619 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
3621 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1)))
3622 if (ConstantSDNode
*N2C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2)))
3623 if (ConstantSDNode
*N3C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(3)))
3624 if (N1C
->isNullValue() && N3C
->isNullValue() &&
3625 N2C
->getZExtValue() == 1ULL && CC
== ISD::SETNE
&&
3626 // FIXME: Implement this optzn for PPC64.
3627 N
->getValueType(0) == MVT::i32
) {
3629 CurDAG
->getMachineNode(PPC::ADDIC
, dl
, MVT::i32
, MVT::Glue
,
3630 N
->getOperand(0), getI32Imm(~0U, dl
));
3631 CurDAG
->SelectNodeTo(N
, PPC::SUBFE
, MVT::i32
, SDValue(Tmp
, 0),
3632 N
->getOperand(0), SDValue(Tmp
, 1));
3636 SDValue CCReg
= SelectCC(N
->getOperand(0), N
->getOperand(1), CC
, dl
);
3638 if (N
->getValueType(0) == MVT::i1
) {
3639 // An i1 select is: (c & t) | (!c & f).
3641 unsigned Idx
= getCRIdxForSetCC(CC
, Inv
);
3645 default: llvm_unreachable("Invalid CC index");
3646 case 0: SRI
= PPC::sub_lt
; break;
3647 case 1: SRI
= PPC::sub_gt
; break;
3648 case 2: SRI
= PPC::sub_eq
; break;
3649 case 3: SRI
= PPC::sub_un
; break;
3652 SDValue CCBit
= CurDAG
->getTargetExtractSubreg(SRI
, dl
, MVT::i1
, CCReg
);
3654 SDValue
NotCCBit(CurDAG
->getMachineNode(PPC::CRNOR
, dl
, MVT::i1
,
3656 SDValue C
= Inv
? NotCCBit
: CCBit
,
3657 NotC
= Inv
? CCBit
: NotCCBit
;
3659 SDValue
CAndT(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
3660 C
, N
->getOperand(2)), 0);
3661 SDValue
NotCAndF(CurDAG
->getMachineNode(PPC::CRAND
, dl
, MVT::i1
,
3662 NotC
, N
->getOperand(3)), 0);
3664 CurDAG
->SelectNodeTo(N
, PPC::CROR
, MVT::i1
, CAndT
, NotCAndF
);
3668 unsigned BROpc
= getPredicateForSetCC(CC
);
3670 unsigned SelectCCOp
;
3671 if (N
->getValueType(0) == MVT::i32
)
3672 SelectCCOp
= PPC::SELECT_CC_I4
;
3673 else if (N
->getValueType(0) == MVT::i64
)
3674 SelectCCOp
= PPC::SELECT_CC_I8
;
3675 else if (N
->getValueType(0) == MVT::f32
)
3676 if (PPCSubTarget
->hasP8Vector())
3677 SelectCCOp
= PPC::SELECT_CC_VSSRC
;
3679 SelectCCOp
= PPC::SELECT_CC_F4
;
3680 else if (N
->getValueType(0) == MVT::f64
)
3681 if (PPCSubTarget
->hasVSX())
3682 SelectCCOp
= PPC::SELECT_CC_VSFRC
;
3684 SelectCCOp
= PPC::SELECT_CC_F8
;
3685 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f64
)
3686 SelectCCOp
= PPC::SELECT_CC_QFRC
;
3687 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4f32
)
3688 SelectCCOp
= PPC::SELECT_CC_QSRC
;
3689 else if (PPCSubTarget
->hasQPX() && N
->getValueType(0) == MVT::v4i1
)
3690 SelectCCOp
= PPC::SELECT_CC_QBRC
;
3691 else if (N
->getValueType(0) == MVT::v2f64
||
3692 N
->getValueType(0) == MVT::v2i64
)
3693 SelectCCOp
= PPC::SELECT_CC_VSRC
;
3695 SelectCCOp
= PPC::SELECT_CC_VRRC
;
3697 SDValue Ops
[] = { CCReg
, N
->getOperand(2), N
->getOperand(3),
3698 getI32Imm(BROpc
, dl
) };
3699 CurDAG
->SelectNodeTo(N
, SelectCCOp
, N
->getValueType(0), Ops
);
3703 if (PPCSubTarget
->hasVSX()) {
3704 SDValue Ops
[] = { N
->getOperand(2), N
->getOperand(1), N
->getOperand(0) };
3705 CurDAG
->SelectNodeTo(N
, PPC::XXSEL
, N
->getValueType(0), Ops
);
3710 case ISD::VECTOR_SHUFFLE
:
3711 if (PPCSubTarget
->hasVSX() && (N
->getValueType(0) == MVT::v2f64
||
3712 N
->getValueType(0) == MVT::v2i64
)) {
3713 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
3715 SDValue Op1
= N
->getOperand(SVN
->getMaskElt(0) < 2 ? 0 : 1),
3716 Op2
= N
->getOperand(SVN
->getMaskElt(1) < 2 ? 0 : 1);
3719 for (int i
= 0; i
< 2; ++i
)
3720 if (SVN
->getMaskElt(i
) <= 0 || SVN
->getMaskElt(i
) == 2)
3725 if (Op1
== Op2
&& DM
[0] == 0 && DM
[1] == 0 &&
3726 Op1
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
3727 isa
<LoadSDNode
>(Op1
.getOperand(0))) {
3728 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op1
.getOperand(0));
3729 SDValue Base
, Offset
;
3731 if (LD
->isUnindexed() && LD
->hasOneUse() && Op1
.hasOneUse() &&
3732 (LD
->getMemoryVT() == MVT::f64
||
3733 LD
->getMemoryVT() == MVT::i64
) &&
3734 SelectAddrIdxOnly(LD
->getBasePtr(), Base
, Offset
)) {
3735 SDValue Chain
= LD
->getChain();
3736 SDValue Ops
[] = { Base
, Offset
, Chain
};
3737 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
3738 MemOp
[0] = LD
->getMemOperand();
3739 SDNode
*NewN
= CurDAG
->SelectNodeTo(N
, PPC::LXVDSX
,
3740 N
->getValueType(0), Ops
);
3741 cast
<MachineSDNode
>(NewN
)->setMemRefs(MemOp
, MemOp
+ 1);
3746 // For little endian, we must swap the input operands and adjust
3747 // the mask elements (reverse and invert them).
3748 if (PPCSubTarget
->isLittleEndian()) {
3749 std::swap(Op1
, Op2
);
3750 unsigned tmp
= DM
[0];
3755 SDValue DMV
= CurDAG
->getTargetConstant(DM
[1] | (DM
[0] << 1), dl
,
3757 SDValue Ops
[] = { Op1
, Op2
, DMV
};
3758 CurDAG
->SelectNodeTo(N
, PPC::XXPERMDI
, N
->getValueType(0), Ops
);
3765 bool IsPPC64
= PPCSubTarget
->isPPC64();
3766 SDValue Ops
[] = { N
->getOperand(1), N
->getOperand(0) };
3767 CurDAG
->SelectNodeTo(N
, N
->getOpcode() == PPCISD::BDNZ
3768 ? (IsPPC64
? PPC::BDNZ8
: PPC::BDNZ
)
3769 : (IsPPC64
? PPC::BDZ8
: PPC::BDZ
),
3773 case PPCISD::COND_BRANCH
: {
3774 // Op #0 is the Chain.
3775 // Op #1 is the PPC::PRED_* number.
3777 // Op #3 is the Dest MBB
3778 // Op #4 is the Flag.
3779 // Prevent PPC::PRED_* from being selected into LI.
3780 unsigned PCC
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
3781 if (EnableBranchHint
)
3782 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(3));
3784 SDValue Pred
= getI32Imm(PCC
, dl
);
3785 SDValue Ops
[] = { Pred
, N
->getOperand(2), N
->getOperand(3),
3786 N
->getOperand(0), N
->getOperand(4) };
3787 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
3791 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
3792 unsigned PCC
= getPredicateForSetCC(CC
);
3794 if (N
->getOperand(2).getValueType() == MVT::i1
) {
3798 default: llvm_unreachable("Unexpected Boolean-operand predicate");
3799 case PPC::PRED_LT
: Opc
= PPC::CRANDC
; Swap
= true; break;
3800 case PPC::PRED_LE
: Opc
= PPC::CRORC
; Swap
= true; break;
3801 case PPC::PRED_EQ
: Opc
= PPC::CREQV
; Swap
= false; break;
3802 case PPC::PRED_GE
: Opc
= PPC::CRORC
; Swap
= false; break;
3803 case PPC::PRED_GT
: Opc
= PPC::CRANDC
; Swap
= false; break;
3804 case PPC::PRED_NE
: Opc
= PPC::CRXOR
; Swap
= false; break;
3807 SDValue
BitComp(CurDAG
->getMachineNode(Opc
, dl
, MVT::i1
,
3808 N
->getOperand(Swap
? 3 : 2),
3809 N
->getOperand(Swap
? 2 : 3)), 0);
3810 CurDAG
->SelectNodeTo(N
, PPC::BC
, MVT::Other
, BitComp
, N
->getOperand(4),
3815 if (EnableBranchHint
)
3816 PCC
|= getBranchHint(PCC
, FuncInfo
, N
->getOperand(4));
3818 SDValue CondCode
= SelectCC(N
->getOperand(2), N
->getOperand(3), CC
, dl
);
3819 SDValue Ops
[] = { getI32Imm(PCC
, dl
), CondCode
,
3820 N
->getOperand(4), N
->getOperand(0) };
3821 CurDAG
->SelectNodeTo(N
, PPC::BCC
, MVT::Other
, Ops
);
3825 // FIXME: Should custom lower this.
3826 SDValue Chain
= N
->getOperand(0);
3827 SDValue Target
= N
->getOperand(1);
3828 unsigned Opc
= Target
.getValueType() == MVT::i32
? PPC::MTCTR
: PPC::MTCTR8
;
3829 unsigned Reg
= Target
.getValueType() == MVT::i32
? PPC::BCTR
: PPC::BCTR8
;
3830 Chain
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, Target
,
3832 CurDAG
->SelectNodeTo(N
, Reg
, MVT::Other
, Chain
);
3835 case PPCISD::TOC_ENTRY
: {
3836 assert ((PPCSubTarget
->isPPC64() || PPCSubTarget
->isSVR4ABI()) &&
3837 "Only supported for 64-bit ABI and 32-bit SVR4");
3838 if (PPCSubTarget
->isSVR4ABI() && !PPCSubTarget
->isPPC64()) {
3839 SDValue GA
= N
->getOperand(0);
3840 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LWZtoc
, dl
, MVT::i32
, GA
,
3842 transferMemOperands(N
, MN
);
3847 // For medium and large code model, we generate two instructions as
3848 // described below. Otherwise we allow SelectCodeCommon to handle this,
3849 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
3850 CodeModel::Model CModel
= TM
.getCodeModel();
3851 if (CModel
!= CodeModel::Medium
&& CModel
!= CodeModel::Large
)
3854 // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
3855 // If it must be toc-referenced according to PPCSubTarget, we generate:
3856 // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
3857 // Otherwise we generate:
3858 // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
3859 SDValue GA
= N
->getOperand(0);
3860 SDValue TOCbase
= N
->getOperand(1);
3861 SDNode
*Tmp
= CurDAG
->getMachineNode(PPC::ADDIStocHA
, dl
, MVT::i64
,
3864 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
) ||
3865 CModel
== CodeModel::Large
) {
3866 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LDtocL
, dl
, MVT::i64
, GA
,
3868 transferMemOperands(N
, MN
);
3873 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
)) {
3874 const GlobalValue
*GV
= G
->getGlobal();
3875 unsigned char GVFlags
= PPCSubTarget
->classifyGlobalReference(GV
);
3876 if (GVFlags
& PPCII::MO_NLP_FLAG
) {
3877 SDNode
*MN
= CurDAG
->getMachineNode(PPC::LDtocL
, dl
, MVT::i64
, GA
,
3879 transferMemOperands(N
, MN
);
3885 ReplaceNode(N
, CurDAG
->getMachineNode(PPC::ADDItocL
, dl
, MVT::i64
,
3886 SDValue(Tmp
, 0), GA
));
3889 case PPCISD::PPC32_PICGOT
:
3890 // Generate a PIC-safe GOT reference.
3891 assert(!PPCSubTarget
->isPPC64() && PPCSubTarget
->isSVR4ABI() &&
3892 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
3893 CurDAG
->SelectNodeTo(N
, PPC::PPC32PICGOT
,
3894 PPCLowering
->getPointerTy(CurDAG
->getDataLayout()),
3898 case PPCISD::VADD_SPLAT
: {
3899 // This expands into one of three sequences, depending on whether
3900 // the first operand is odd or even, positive or negative.
3901 assert(isa
<ConstantSDNode
>(N
->getOperand(0)) &&
3902 isa
<ConstantSDNode
>(N
->getOperand(1)) &&
3903 "Invalid operand on VADD_SPLAT!");
3905 int Elt
= N
->getConstantOperandVal(0);
3906 int EltSize
= N
->getConstantOperandVal(1);
3907 unsigned Opc1
, Opc2
, Opc3
;
3911 Opc1
= PPC::VSPLTISB
;
3912 Opc2
= PPC::VADDUBM
;
3913 Opc3
= PPC::VSUBUBM
;
3915 } else if (EltSize
== 2) {
3916 Opc1
= PPC::VSPLTISH
;
3917 Opc2
= PPC::VADDUHM
;
3918 Opc3
= PPC::VSUBUHM
;
3921 assert(EltSize
== 4 && "Invalid element size on VADD_SPLAT!");
3922 Opc1
= PPC::VSPLTISW
;
3923 Opc2
= PPC::VADDUWM
;
3924 Opc3
= PPC::VSUBUWM
;
3928 if ((Elt
& 1) == 0) {
3929 // Elt is even, in the range [-32,-18] + [16,30].
3931 // Convert: VADD_SPLAT elt, size
3932 // Into: tmp = VSPLTIS[BHW] elt
3933 // VADDU[BHW]M tmp, tmp
3934 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
3935 SDValue EltVal
= getI32Imm(Elt
>> 1, dl
);
3936 SDNode
*Tmp
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
3937 SDValue TmpVal
= SDValue(Tmp
, 0);
3938 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, TmpVal
, TmpVal
));
3940 } else if (Elt
> 0) {
3941 // Elt is odd and positive, in the range [17,31].
3943 // Convert: VADD_SPLAT elt, size
3944 // Into: tmp1 = VSPLTIS[BHW] elt-16
3945 // tmp2 = VSPLTIS[BHW] -16
3946 // VSUBU[BHW]M tmp1, tmp2
3947 SDValue EltVal
= getI32Imm(Elt
- 16, dl
);
3948 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
3949 EltVal
= getI32Imm(-16, dl
);
3950 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
3951 ReplaceNode(N
, CurDAG
->getMachineNode(Opc3
, dl
, VT
, SDValue(Tmp1
, 0),
3955 // Elt is odd and negative, in the range [-31,-17].
3957 // Convert: VADD_SPLAT elt, size
3958 // Into: tmp1 = VSPLTIS[BHW] elt+16
3959 // tmp2 = VSPLTIS[BHW] -16
3960 // VADDU[BHW]M tmp1, tmp2
3961 SDValue EltVal
= getI32Imm(Elt
+ 16, dl
);
3962 SDNode
*Tmp1
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
3963 EltVal
= getI32Imm(-16, dl
);
3964 SDNode
*Tmp2
= CurDAG
->getMachineNode(Opc1
, dl
, VT
, EltVal
);
3965 ReplaceNode(N
, CurDAG
->getMachineNode(Opc2
, dl
, VT
, SDValue(Tmp1
, 0),
3975 // If the target supports the cmpb instruction, do the idiom recognition here.
3976 // We don't do this as a DAG combine because we don't want to do it as nodes
3977 // are being combined (because we might miss part of the eventual idiom). We
3978 // don't want to do it during instruction selection because we want to reuse
3979 // the logic for lowering the masking operations already part of the
3980 // instruction selector.
3981 SDValue
PPCDAGToDAGISel::combineToCMPB(SDNode
*N
) {
3984 assert(N
->getOpcode() == ISD::OR
&&
3985 "Only OR nodes are supported for CMPB");
3988 if (!PPCSubTarget
->hasCMPB())
3991 if (N
->getValueType(0) != MVT::i32
&&
3992 N
->getValueType(0) != MVT::i64
)
3995 EVT VT
= N
->getValueType(0);
3998 bool BytesFound
[8] = {false, false, false, false, false, false, false, false};
3999 uint64_t Mask
= 0, Alt
= 0;
4001 auto IsByteSelectCC
= [this](SDValue O
, unsigned &b
,
4002 uint64_t &Mask
, uint64_t &Alt
,
4003 SDValue
&LHS
, SDValue
&RHS
) {
4004 if (O
.getOpcode() != ISD::SELECT_CC
)
4006 ISD::CondCode CC
= cast
<CondCodeSDNode
>(O
.getOperand(4))->get();
4008 if (!isa
<ConstantSDNode
>(O
.getOperand(2)) ||
4009 !isa
<ConstantSDNode
>(O
.getOperand(3)))
4012 uint64_t PM
= O
.getConstantOperandVal(2);
4013 uint64_t PAlt
= O
.getConstantOperandVal(3);
4014 for (b
= 0; b
< 8; ++b
) {
4015 uint64_t Mask
= UINT64_C(0xFF) << (8*b
);
4016 if (PM
&& (PM
& Mask
) == PM
&& (PAlt
& Mask
) == PAlt
)
4025 if (!isa
<ConstantSDNode
>(O
.getOperand(1)) ||
4026 O
.getConstantOperandVal(1) != 0) {
4027 SDValue Op0
= O
.getOperand(0), Op1
= O
.getOperand(1);
4028 if (Op0
.getOpcode() == ISD::TRUNCATE
)
4029 Op0
= Op0
.getOperand(0);
4030 if (Op1
.getOpcode() == ISD::TRUNCATE
)
4031 Op1
= Op1
.getOperand(0);
4033 if (Op0
.getOpcode() == ISD::SRL
&& Op1
.getOpcode() == ISD::SRL
&&
4034 Op0
.getOperand(1) == Op1
.getOperand(1) && CC
== ISD::SETEQ
&&
4035 isa
<ConstantSDNode
>(Op0
.getOperand(1))) {
4037 unsigned Bits
= Op0
.getValueSizeInBits();
4040 if (Op0
.getConstantOperandVal(1) != Bits
-8)
4043 LHS
= Op0
.getOperand(0);
4044 RHS
= Op1
.getOperand(0);
4048 // When we have small integers (i16 to be specific), the form present
4049 // post-legalization uses SETULT in the SELECT_CC for the
4050 // higher-order byte, depending on the fact that the
4051 // even-higher-order bytes are known to all be zero, for example:
4052 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
4053 // (so when the second byte is the same, because all higher-order
4054 // bits from bytes 3 and 4 are known to be zero, the result of the
4055 // xor can be at most 255)
4056 if (Op0
.getOpcode() == ISD::XOR
&& CC
== ISD::SETULT
&&
4057 isa
<ConstantSDNode
>(O
.getOperand(1))) {
4059 uint64_t ULim
= O
.getConstantOperandVal(1);
4060 if (ULim
!= (UINT64_C(1) << b
*8))
4063 // Now we need to make sure that the upper bytes are known to be
4065 unsigned Bits
= Op0
.getValueSizeInBits();
4066 if (!CurDAG
->MaskedValueIsZero(
4067 Op0
, APInt::getHighBitsSet(Bits
, Bits
- (b
+ 1) * 8)))
4070 LHS
= Op0
.getOperand(0);
4071 RHS
= Op0
.getOperand(1);
4078 if (CC
!= ISD::SETEQ
)
4081 SDValue Op
= O
.getOperand(0);
4082 if (Op
.getOpcode() == ISD::AND
) {
4083 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
4085 if (Op
.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b
)))
4088 SDValue XOR
= Op
.getOperand(0);
4089 if (XOR
.getOpcode() == ISD::TRUNCATE
)
4090 XOR
= XOR
.getOperand(0);
4091 if (XOR
.getOpcode() != ISD::XOR
)
4094 LHS
= XOR
.getOperand(0);
4095 RHS
= XOR
.getOperand(1);
4097 } else if (Op
.getOpcode() == ISD::SRL
) {
4098 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
4100 unsigned Bits
= Op
.getValueSizeInBits();
4103 if (Op
.getConstantOperandVal(1) != Bits
-8)
4106 SDValue XOR
= Op
.getOperand(0);
4107 if (XOR
.getOpcode() == ISD::TRUNCATE
)
4108 XOR
= XOR
.getOperand(0);
4109 if (XOR
.getOpcode() != ISD::XOR
)
4112 LHS
= XOR
.getOperand(0);
4113 RHS
= XOR
.getOperand(1);
4120 SmallVector
<SDValue
, 8> Queue(1, SDValue(N
, 0));
4121 while (!Queue
.empty()) {
4122 SDValue V
= Queue
.pop_back_val();
4124 for (const SDValue
&O
: V
.getNode()->ops()) {
4126 uint64_t M
= 0, A
= 0;
4128 if (O
.getOpcode() == ISD::OR
) {
4130 } else if (IsByteSelectCC(O
, b
, M
, A
, OLHS
, ORHS
)) {
4134 BytesFound
[b
] = true;
4137 } else if ((LHS
== ORHS
&& RHS
== OLHS
) ||
4138 (RHS
== ORHS
&& LHS
== OLHS
)) {
4139 BytesFound
[b
] = true;
4151 unsigned LastB
= 0, BCnt
= 0;
4152 for (unsigned i
= 0; i
< 8; ++i
)
4153 if (BytesFound
[LastB
]) {
4158 if (!LastB
|| BCnt
< 2)
4161 // Because we'll be zero-extending the output anyway if don't have a specific
4162 // value for each input byte (via the Mask), we can 'anyext' the inputs.
4163 if (LHS
.getValueType() != VT
) {
4164 LHS
= CurDAG
->getAnyExtOrTrunc(LHS
, dl
, VT
);
4165 RHS
= CurDAG
->getAnyExtOrTrunc(RHS
, dl
, VT
);
4168 Res
= CurDAG
->getNode(PPCISD::CMPB
, dl
, VT
, LHS
, RHS
);
4170 bool NonTrivialMask
= ((int64_t) Mask
) != INT64_C(-1);
4171 if (NonTrivialMask
&& !Alt
) {
4172 // Res = Mask & CMPB
4173 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
4174 CurDAG
->getConstant(Mask
, dl
, VT
));
4176 // Res = (CMPB & Mask) | (~CMPB & Alt)
4177 // Which, as suggested here:
4178 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
4179 // can be written as:
4180 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
4181 // useful because the (Alt ^ Mask) can be pre-computed.
4182 Res
= CurDAG
->getNode(ISD::AND
, dl
, VT
, Res
,
4183 CurDAG
->getConstant(Mask
^ Alt
, dl
, VT
));
4184 Res
= CurDAG
->getNode(ISD::XOR
, dl
, VT
, Res
,
4185 CurDAG
->getConstant(Alt
, dl
, VT
));
4191 // When CR bit registers are enabled, an extension of an i1 variable to a i32
4192 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
4193 // involves constant materialization of a 0 or a 1 or both. If the result of
4194 // the extension is then operated upon by some operator that can be constant
4195 // folded with a constant 0 or 1, and that constant can be materialized using
4196 // only one instruction (like a zero or one), then we should fold in those
4197 // operations with the select.
4198 void PPCDAGToDAGISel::foldBoolExts(SDValue
&Res
, SDNode
*&N
) {
4199 if (!PPCSubTarget
->useCRBits())
4202 if (N
->getOpcode() != ISD::ZERO_EXTEND
&&
4203 N
->getOpcode() != ISD::SIGN_EXTEND
&&
4204 N
->getOpcode() != ISD::ANY_EXTEND
)
4207 if (N
->getOperand(0).getValueType() != MVT::i1
)
4210 if (!N
->hasOneUse())
4214 EVT VT
= N
->getValueType(0);
4215 SDValue Cond
= N
->getOperand(0);
4217 CurDAG
->getConstant(N
->getOpcode() == ISD::SIGN_EXTEND
? -1 : 1, dl
, VT
);
4218 SDValue ConstFalse
= CurDAG
->getConstant(0, dl
, VT
);
4221 SDNode
*User
= *N
->use_begin();
4222 if (User
->getNumOperands() != 2)
4225 auto TryFold
= [this, N
, User
, dl
](SDValue Val
) {
4226 SDValue UserO0
= User
->getOperand(0), UserO1
= User
->getOperand(1);
4227 SDValue O0
= UserO0
.getNode() == N
? Val
: UserO0
;
4228 SDValue O1
= UserO1
.getNode() == N
? Val
: UserO1
;
4230 return CurDAG
->FoldConstantArithmetic(User
->getOpcode(), dl
,
4231 User
->getValueType(0),
4232 O0
.getNode(), O1
.getNode());
4235 // FIXME: When the semantics of the interaction between select and undef
4236 // are clearly defined, it may turn out to be unnecessary to break here.
4237 SDValue TrueRes
= TryFold(ConstTrue
);
4238 if (!TrueRes
|| TrueRes
.isUndef())
4240 SDValue FalseRes
= TryFold(ConstFalse
);
4241 if (!FalseRes
|| FalseRes
.isUndef())
4244 // For us to materialize these using one instruction, we must be able to
4245 // represent them as signed 16-bit integers.
4246 uint64_t True
= cast
<ConstantSDNode
>(TrueRes
)->getZExtValue(),
4247 False
= cast
<ConstantSDNode
>(FalseRes
)->getZExtValue();
4248 if (!isInt
<16>(True
) || !isInt
<16>(False
))
4251 // We can replace User with a new SELECT node, and try again to see if we
4252 // can fold the select with its user.
4253 Res
= CurDAG
->getSelect(dl
, User
->getValueType(0), Cond
, TrueRes
, FalseRes
);
4255 ConstTrue
= TrueRes
;
4256 ConstFalse
= FalseRes
;
4257 } while (N
->hasOneUse());
4260 void PPCDAGToDAGISel::PreprocessISelDAG() {
4261 SelectionDAG::allnodes_iterator
Position(CurDAG
->getRoot().getNode());
4264 bool MadeChange
= false;
4265 while (Position
!= CurDAG
->allnodes_begin()) {
4266 SDNode
*N
= &*--Position
;
4271 switch (N
->getOpcode()) {
4274 Res
= combineToCMPB(N
);
4279 foldBoolExts(Res
, N
);
4282 DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
4283 DEBUG(N
->dump(CurDAG
));
4284 DEBUG(dbgs() << "\nNew: ");
4285 DEBUG(Res
.getNode()->dump(CurDAG
));
4286 DEBUG(dbgs() << "\n");
4288 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Res
);
4294 CurDAG
->RemoveDeadNodes();
4297 /// PostprocessISelDAG - Perform some late peephole optimizations
4298 /// on the DAG representation.
4299 void PPCDAGToDAGISel::PostprocessISelDAG() {
4300 // Skip peepholes at -O0.
4301 if (TM
.getOptLevel() == CodeGenOpt::None
)
4306 PeepholePPC64ZExt();
4309 // Check if all users of this node will become isel where the second operand
4310 // is the constant zero. If this is so, and if we can negate the condition,
4311 // then we can flip the true and false operands. This will allow the zero to
4312 // be folded with the isel so that we don't need to materialize a register
4314 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode
*N
) {
4315 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
4318 if (!User
->isMachineOpcode())
4320 if (User
->getMachineOpcode() != PPC::SELECT_I4
&&
4321 User
->getMachineOpcode() != PPC::SELECT_I8
)
4324 SDNode
*Op2
= User
->getOperand(2).getNode();
4325 if (!Op2
->isMachineOpcode())
4328 if (Op2
->getMachineOpcode() != PPC::LI
&&
4329 Op2
->getMachineOpcode() != PPC::LI8
)
4332 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op2
->getOperand(0));
4336 if (!C
->isNullValue())
4343 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode
*N
) {
4344 SmallVector
<SDNode
*, 4> ToReplace
;
4345 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
4348 assert((User
->getMachineOpcode() == PPC::SELECT_I4
||
4349 User
->getMachineOpcode() == PPC::SELECT_I8
) &&
4350 "Must have all select users");
4351 ToReplace
.push_back(User
);
4354 for (SmallVector
<SDNode
*, 4>::iterator UI
= ToReplace
.begin(),
4355 UE
= ToReplace
.end(); UI
!= UE
; ++UI
) {
4358 CurDAG
->getMachineNode(User
->getMachineOpcode(), SDLoc(User
),
4359 User
->getValueType(0), User
->getOperand(0),
4360 User
->getOperand(2),
4361 User
->getOperand(1));
4363 DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
4364 DEBUG(User
->dump(CurDAG
));
4365 DEBUG(dbgs() << "\nNew: ");
4366 DEBUG(ResNode
->dump(CurDAG
));
4367 DEBUG(dbgs() << "\n");
4369 ReplaceUses(User
, ResNode
);
4373 void PPCDAGToDAGISel::PeepholeCROps() {
4377 for (SDNode
&Node
: CurDAG
->allnodes()) {
4378 MachineSDNode
*MachineNode
= dyn_cast
<MachineSDNode
>(&Node
);
4379 if (!MachineNode
|| MachineNode
->use_empty())
4381 SDNode
*ResNode
= MachineNode
;
4383 bool Op1Set
= false, Op1Unset
= false,
4385 Op2Set
= false, Op2Unset
= false,
4388 unsigned Opcode
= MachineNode
->getMachineOpcode();
4399 SDValue Op
= MachineNode
->getOperand(1);
4400 if (Op
.isMachineOpcode()) {
4401 if (Op
.getMachineOpcode() == PPC::CRSET
)
4403 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
4405 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
4406 Op
.getOperand(0) == Op
.getOperand(1))
4413 case PPC::SELECT_I4
:
4414 case PPC::SELECT_I8
:
4415 case PPC::SELECT_F4
:
4416 case PPC::SELECT_F8
:
4417 case PPC::SELECT_QFRC
:
4418 case PPC::SELECT_QSRC
:
4419 case PPC::SELECT_QBRC
:
4420 case PPC::SELECT_VRRC
:
4421 case PPC::SELECT_VSFRC
:
4422 case PPC::SELECT_VSSRC
:
4423 case PPC::SELECT_VSRC
: {
4424 SDValue Op
= MachineNode
->getOperand(0);
4425 if (Op
.isMachineOpcode()) {
4426 if (Op
.getMachineOpcode() == PPC::CRSET
)
4428 else if (Op
.getMachineOpcode() == PPC::CRUNSET
)
4430 else if (Op
.getMachineOpcode() == PPC::CRNOR
&&
4431 Op
.getOperand(0) == Op
.getOperand(1))
4438 bool SelectSwap
= false;
4442 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4444 ResNode
= MachineNode
->getOperand(0).getNode();
4447 ResNode
= MachineNode
->getOperand(1).getNode();
4450 ResNode
= MachineNode
->getOperand(0).getNode();
4451 else if (Op1Unset
|| Op2Unset
)
4452 // x & 0 = 0 & y = 0
4453 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
4456 // ~x & y = andc(y, x)
4457 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
4458 MVT::i1
, MachineNode
->getOperand(1),
4459 MachineNode
->getOperand(0).
4462 // x & ~y = andc(x, y)
4463 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
4464 MVT::i1
, MachineNode
->getOperand(0),
4465 MachineNode
->getOperand(1).
4467 else if (AllUsersSelectZero(MachineNode
)) {
4468 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
4469 MVT::i1
, MachineNode
->getOperand(0),
4470 MachineNode
->getOperand(1));
4475 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4476 // nand(x, x) -> nor(x, x)
4477 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4478 MVT::i1
, MachineNode
->getOperand(0),
4479 MachineNode
->getOperand(0));
4481 // nand(1, y) -> nor(y, y)
4482 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4483 MVT::i1
, MachineNode
->getOperand(1),
4484 MachineNode
->getOperand(1));
4486 // nand(x, 1) -> nor(x, x)
4487 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4488 MVT::i1
, MachineNode
->getOperand(0),
4489 MachineNode
->getOperand(0));
4490 else if (Op1Unset
|| Op2Unset
)
4491 // nand(x, 0) = nand(0, y) = 1
4492 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
4495 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
4496 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
4497 MVT::i1
, MachineNode
->getOperand(0).
4499 MachineNode
->getOperand(1));
4501 // nand(x, ~y) = ~x | y = orc(y, x)
4502 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
4503 MVT::i1
, MachineNode
->getOperand(1).
4505 MachineNode
->getOperand(0));
4506 else if (AllUsersSelectZero(MachineNode
)) {
4507 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
4508 MVT::i1
, MachineNode
->getOperand(0),
4509 MachineNode
->getOperand(1));
4514 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4516 ResNode
= MachineNode
->getOperand(0).getNode();
4517 else if (Op1Set
|| Op2Set
)
4518 // x | 1 = 1 | y = 1
4519 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
4523 ResNode
= MachineNode
->getOperand(1).getNode();
4526 ResNode
= MachineNode
->getOperand(0).getNode();
4528 // ~x | y = orc(y, x)
4529 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
4530 MVT::i1
, MachineNode
->getOperand(1),
4531 MachineNode
->getOperand(0).
4534 // x | ~y = orc(x, y)
4535 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
4536 MVT::i1
, MachineNode
->getOperand(0),
4537 MachineNode
->getOperand(1).
4539 else if (AllUsersSelectZero(MachineNode
)) {
4540 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4541 MVT::i1
, MachineNode
->getOperand(0),
4542 MachineNode
->getOperand(1));
4547 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4549 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
4552 // xor(1, y) -> nor(y, y)
4553 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4554 MVT::i1
, MachineNode
->getOperand(1),
4555 MachineNode
->getOperand(1));
4557 // xor(x, 1) -> nor(x, x)
4558 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4559 MVT::i1
, MachineNode
->getOperand(0),
4560 MachineNode
->getOperand(0));
4563 ResNode
= MachineNode
->getOperand(1).getNode();
4566 ResNode
= MachineNode
->getOperand(0).getNode();
4568 // xor(~x, y) = eqv(x, y)
4569 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
4570 MVT::i1
, MachineNode
->getOperand(0).
4572 MachineNode
->getOperand(1));
4574 // xor(x, ~y) = eqv(x, y)
4575 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
4576 MVT::i1
, MachineNode
->getOperand(0),
4577 MachineNode
->getOperand(1).
4579 else if (AllUsersSelectZero(MachineNode
)) {
4580 ResNode
= CurDAG
->getMachineNode(PPC::CREQV
, SDLoc(MachineNode
),
4581 MVT::i1
, MachineNode
->getOperand(0),
4582 MachineNode
->getOperand(1));
4587 if (Op1Set
|| Op2Set
)
4589 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
4592 // nor(0, y) = ~y -> nor(y, y)
4593 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4594 MVT::i1
, MachineNode
->getOperand(1),
4595 MachineNode
->getOperand(1));
4598 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4599 MVT::i1
, MachineNode
->getOperand(0),
4600 MachineNode
->getOperand(0));
4602 // nor(~x, y) = andc(x, y)
4603 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
4604 MVT::i1
, MachineNode
->getOperand(0).
4606 MachineNode
->getOperand(1));
4608 // nor(x, ~y) = andc(y, x)
4609 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
4610 MVT::i1
, MachineNode
->getOperand(1).
4612 MachineNode
->getOperand(0));
4613 else if (AllUsersSelectZero(MachineNode
)) {
4614 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
4615 MVT::i1
, MachineNode
->getOperand(0),
4616 MachineNode
->getOperand(1));
4621 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4623 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
4627 ResNode
= MachineNode
->getOperand(1).getNode();
4630 ResNode
= MachineNode
->getOperand(0).getNode();
4632 // eqv(0, y) = ~y -> nor(y, y)
4633 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4634 MVT::i1
, MachineNode
->getOperand(1),
4635 MachineNode
->getOperand(1));
4638 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4639 MVT::i1
, MachineNode
->getOperand(0),
4640 MachineNode
->getOperand(0));
4642 // eqv(~x, y) = xor(x, y)
4643 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
4644 MVT::i1
, MachineNode
->getOperand(0).
4646 MachineNode
->getOperand(1));
4648 // eqv(x, ~y) = xor(x, y)
4649 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
4650 MVT::i1
, MachineNode
->getOperand(0),
4651 MachineNode
->getOperand(1).
4653 else if (AllUsersSelectZero(MachineNode
)) {
4654 ResNode
= CurDAG
->getMachineNode(PPC::CRXOR
, SDLoc(MachineNode
),
4655 MVT::i1
, MachineNode
->getOperand(0),
4656 MachineNode
->getOperand(1));
4661 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4663 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
4667 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4668 MVT::i1
, MachineNode
->getOperand(1),
4669 MachineNode
->getOperand(1));
4670 else if (Op1Unset
|| Op2Set
)
4671 // andc(0, y) = andc(x, 1) = 0
4672 ResNode
= CurDAG
->getMachineNode(PPC::CRUNSET
, SDLoc(MachineNode
),
4676 ResNode
= MachineNode
->getOperand(0).getNode();
4678 // andc(~x, y) = ~(x | y) = nor(x, y)
4679 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4680 MVT::i1
, MachineNode
->getOperand(0).
4682 MachineNode
->getOperand(1));
4684 // andc(x, ~y) = x & y
4685 ResNode
= CurDAG
->getMachineNode(PPC::CRAND
, SDLoc(MachineNode
),
4686 MVT::i1
, MachineNode
->getOperand(0),
4687 MachineNode
->getOperand(1).
4689 else if (AllUsersSelectZero(MachineNode
)) {
4690 ResNode
= CurDAG
->getMachineNode(PPC::CRORC
, SDLoc(MachineNode
),
4691 MVT::i1
, MachineNode
->getOperand(1),
4692 MachineNode
->getOperand(0));
4697 if (MachineNode
->getOperand(0) == MachineNode
->getOperand(1))
4699 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
4701 else if (Op1Set
|| Op2Unset
)
4702 // orc(1, y) = orc(x, 0) = 1
4703 ResNode
= CurDAG
->getMachineNode(PPC::CRSET
, SDLoc(MachineNode
),
4707 ResNode
= MachineNode
->getOperand(0).getNode();
4710 ResNode
= CurDAG
->getMachineNode(PPC::CRNOR
, SDLoc(MachineNode
),
4711 MVT::i1
, MachineNode
->getOperand(1),
4712 MachineNode
->getOperand(1));
4714 // orc(~x, y) = ~(x & y) = nand(x, y)
4715 ResNode
= CurDAG
->getMachineNode(PPC::CRNAND
, SDLoc(MachineNode
),
4716 MVT::i1
, MachineNode
->getOperand(0).
4718 MachineNode
->getOperand(1));
4720 // orc(x, ~y) = x | y
4721 ResNode
= CurDAG
->getMachineNode(PPC::CROR
, SDLoc(MachineNode
),
4722 MVT::i1
, MachineNode
->getOperand(0),
4723 MachineNode
->getOperand(1).
4725 else if (AllUsersSelectZero(MachineNode
)) {
4726 ResNode
= CurDAG
->getMachineNode(PPC::CRANDC
, SDLoc(MachineNode
),
4727 MVT::i1
, MachineNode
->getOperand(1),
4728 MachineNode
->getOperand(0));
4732 case PPC::SELECT_I4
:
4733 case PPC::SELECT_I8
:
4734 case PPC::SELECT_F4
:
4735 case PPC::SELECT_F8
:
4736 case PPC::SELECT_QFRC
:
4737 case PPC::SELECT_QSRC
:
4738 case PPC::SELECT_QBRC
:
4739 case PPC::SELECT_VRRC
:
4740 case PPC::SELECT_VSFRC
:
4741 case PPC::SELECT_VSSRC
:
4742 case PPC::SELECT_VSRC
:
4744 ResNode
= MachineNode
->getOperand(1).getNode();
4746 ResNode
= MachineNode
->getOperand(2).getNode();
4748 ResNode
= CurDAG
->getMachineNode(MachineNode
->getMachineOpcode(),
4750 MachineNode
->getValueType(0),
4751 MachineNode
->getOperand(0).
4753 MachineNode
->getOperand(2),
4754 MachineNode
->getOperand(1));
4759 ResNode
= CurDAG
->getMachineNode(Opcode
== PPC::BC
? PPC::BCn
:
4763 MachineNode
->getOperand(0).
4765 MachineNode
->getOperand(1),
4766 MachineNode
->getOperand(2));
4767 // FIXME: Handle Op1Set, Op1Unset here too.
4771 // If we're inverting this node because it is used only by selects that
4772 // we'd like to swap, then swap the selects before the node replacement.
4774 SwapAllSelectUsers(MachineNode
);
4776 if (ResNode
!= MachineNode
) {
4777 DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
4778 DEBUG(MachineNode
->dump(CurDAG
));
4779 DEBUG(dbgs() << "\nNew: ");
4780 DEBUG(ResNode
->dump(CurDAG
));
4781 DEBUG(dbgs() << "\n");
4783 ReplaceUses(MachineNode
, ResNode
);
4788 CurDAG
->RemoveDeadNodes();
4789 } while (IsModified
);
4792 // Gather the set of 32-bit operations that are known to have their
4793 // higher-order 32 bits zero, where ToPromote contains all such operations.
4794 static bool PeepholePPC64ZExtGather(SDValue Op32
,
4795 SmallPtrSetImpl
<SDNode
*> &ToPromote
) {
4796 if (!Op32
.isMachineOpcode())
4799 // First, check for the "frontier" instructions (those that will clear the
4800 // higher-order 32 bits.
4802 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
4803 // around. If it does not, then these instructions will clear the
4804 // higher-order bits.
4805 if ((Op32
.getMachineOpcode() == PPC::RLWINM
||
4806 Op32
.getMachineOpcode() == PPC::RLWNM
) &&
4807 Op32
.getConstantOperandVal(2) <= Op32
.getConstantOperandVal(3)) {
4808 ToPromote
.insert(Op32
.getNode());
4812 // SLW and SRW always clear the higher-order bits.
4813 if (Op32
.getMachineOpcode() == PPC::SLW
||
4814 Op32
.getMachineOpcode() == PPC::SRW
) {
4815 ToPromote
.insert(Op32
.getNode());
4819 // For LI and LIS, we need the immediate to be positive (so that it is not
4821 if (Op32
.getMachineOpcode() == PPC::LI
||
4822 Op32
.getMachineOpcode() == PPC::LIS
) {
4823 if (!isUInt
<15>(Op32
.getConstantOperandVal(0)))
4826 ToPromote
.insert(Op32
.getNode());
4830 // LHBRX and LWBRX always clear the higher-order bits.
4831 if (Op32
.getMachineOpcode() == PPC::LHBRX
||
4832 Op32
.getMachineOpcode() == PPC::LWBRX
) {
4833 ToPromote
.insert(Op32
.getNode());
4837 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
4838 if (Op32
.getMachineOpcode() == PPC::CNTLZW
||
4839 Op32
.getMachineOpcode() == PPC::CNTTZW
) {
4840 ToPromote
.insert(Op32
.getNode());
4844 // Next, check for those instructions we can look through.
4846 // Assuming the mask does not wrap around, then the higher-order bits are
4847 // taken directly from the first operand.
4848 if (Op32
.getMachineOpcode() == PPC::RLWIMI
&&
4849 Op32
.getConstantOperandVal(3) <= Op32
.getConstantOperandVal(4)) {
4850 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
4851 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
4854 ToPromote
.insert(Op32
.getNode());
4855 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
4859 // For OR, the higher-order bits are zero if that is true for both operands.
4860 // For SELECT_I4, the same is true (but the relevant operand numbers are
4862 if (Op32
.getMachineOpcode() == PPC::OR
||
4863 Op32
.getMachineOpcode() == PPC::SELECT_I4
) {
4864 unsigned B
= Op32
.getMachineOpcode() == PPC::SELECT_I4
? 1 : 0;
4865 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
4866 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+0), ToPromote1
))
4868 if (!PeepholePPC64ZExtGather(Op32
.getOperand(B
+1), ToPromote1
))
4871 ToPromote
.insert(Op32
.getNode());
4872 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
4876 // For ORI and ORIS, we need the higher-order bits of the first operand to be
4877 // zero, and also for the constant to be positive (so that it is not sign
4879 if (Op32
.getMachineOpcode() == PPC::ORI
||
4880 Op32
.getMachineOpcode() == PPC::ORIS
) {
4881 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
4882 if (!PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
))
4884 if (!isUInt
<15>(Op32
.getConstantOperandVal(1)))
4887 ToPromote
.insert(Op32
.getNode());
4888 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
4892 // The higher-order bits of AND are zero if that is true for at least one of
4894 if (Op32
.getMachineOpcode() == PPC::AND
) {
4895 SmallPtrSet
<SDNode
*, 16> ToPromote1
, ToPromote2
;
4897 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
4899 PeepholePPC64ZExtGather(Op32
.getOperand(1), ToPromote2
);
4900 if (!Op0OK
&& !Op1OK
)
4903 ToPromote
.insert(Op32
.getNode());
4906 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
4909 ToPromote
.insert(ToPromote2
.begin(), ToPromote2
.end());
4914 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
4915 // of the first operand, or if the second operand is positive (so that it is
4916 // not sign extended).
4917 if (Op32
.getMachineOpcode() == PPC::ANDIo
||
4918 Op32
.getMachineOpcode() == PPC::ANDISo
) {
4919 SmallPtrSet
<SDNode
*, 16> ToPromote1
;
4921 PeepholePPC64ZExtGather(Op32
.getOperand(0), ToPromote1
);
4922 bool Op1OK
= isUInt
<15>(Op32
.getConstantOperandVal(1));
4923 if (!Op0OK
&& !Op1OK
)
4926 ToPromote
.insert(Op32
.getNode());
4929 ToPromote
.insert(ToPromote1
.begin(), ToPromote1
.end());
4937 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
4938 if (!PPCSubTarget
->isPPC64())
4941 // When we zero-extend from i32 to i64, we use a pattern like this:
4942 // def : Pat<(i64 (zext i32:$in)),
4943 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
4945 // There are several 32-bit shift/rotate instructions, however, that will
4946 // clear the higher-order bits of their output, rendering the RLDICL
4947 // unnecessary. When that happens, we remove it here, and redefine the
4948 // relevant 32-bit operation to be a 64-bit operation.
4950 SelectionDAG::allnodes_iterator
Position(CurDAG
->getRoot().getNode());
4953 bool MadeChange
= false;
4954 while (Position
!= CurDAG
->allnodes_begin()) {
4955 SDNode
*N
= &*--Position
;
4956 // Skip dead nodes and any non-machine opcodes.
4957 if (N
->use_empty() || !N
->isMachineOpcode())
4960 if (N
->getMachineOpcode() != PPC::RLDICL
)
4963 if (N
->getConstantOperandVal(1) != 0 ||
4964 N
->getConstantOperandVal(2) != 32)
4967 SDValue ISR
= N
->getOperand(0);
4968 if (!ISR
.isMachineOpcode() ||
4969 ISR
.getMachineOpcode() != TargetOpcode::INSERT_SUBREG
)
4972 if (!ISR
.hasOneUse())
4975 if (ISR
.getConstantOperandVal(2) != PPC::sub_32
)
4978 SDValue IDef
= ISR
.getOperand(0);
4979 if (!IDef
.isMachineOpcode() ||
4980 IDef
.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF
)
4983 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
4984 // can get rid of it.
4986 SDValue Op32
= ISR
->getOperand(1);
4987 if (!Op32
.isMachineOpcode())
4990 // There are some 32-bit instructions that always clear the high-order 32
4991 // bits, there are also some instructions (like AND) that we can look
4993 SmallPtrSet
<SDNode
*, 16> ToPromote
;
4994 if (!PeepholePPC64ZExtGather(Op32
, ToPromote
))
4997 // If the ToPromote set contains nodes that have uses outside of the set
4998 // (except for the original INSERT_SUBREG), then abort the transformation.
4999 bool OutsideUse
= false;
5000 for (SDNode
*PN
: ToPromote
) {
5001 for (SDNode
*UN
: PN
->uses()) {
5002 if (!ToPromote
.count(UN
) && UN
!= ISR
.getNode()) {
5016 // We now know that this zero extension can be removed by promoting to
5017 // nodes in ToPromote to 64-bit operations, where for operations in the
5018 // frontier of the set, we need to insert INSERT_SUBREGs for their
5020 for (SDNode
*PN
: ToPromote
) {
5022 switch (PN
->getMachineOpcode()) {
5024 llvm_unreachable("Don't know the 64-bit variant of this instruction");
5025 case PPC::RLWINM
: NewOpcode
= PPC::RLWINM8
; break;
5026 case PPC::RLWNM
: NewOpcode
= PPC::RLWNM8
; break;
5027 case PPC::SLW
: NewOpcode
= PPC::SLW8
; break;
5028 case PPC::SRW
: NewOpcode
= PPC::SRW8
; break;
5029 case PPC::LI
: NewOpcode
= PPC::LI8
; break;
5030 case PPC::LIS
: NewOpcode
= PPC::LIS8
; break;
5031 case PPC::LHBRX
: NewOpcode
= PPC::LHBRX8
; break;
5032 case PPC::LWBRX
: NewOpcode
= PPC::LWBRX8
; break;
5033 case PPC::CNTLZW
: NewOpcode
= PPC::CNTLZW8
; break;
5034 case PPC::CNTTZW
: NewOpcode
= PPC::CNTTZW8
; break;
5035 case PPC::RLWIMI
: NewOpcode
= PPC::RLWIMI8
; break;
5036 case PPC::OR
: NewOpcode
= PPC::OR8
; break;
5037 case PPC::SELECT_I4
: NewOpcode
= PPC::SELECT_I8
; break;
5038 case PPC::ORI
: NewOpcode
= PPC::ORI8
; break;
5039 case PPC::ORIS
: NewOpcode
= PPC::ORIS8
; break;
5040 case PPC::AND
: NewOpcode
= PPC::AND8
; break;
5041 case PPC::ANDIo
: NewOpcode
= PPC::ANDIo8
; break;
5042 case PPC::ANDISo
: NewOpcode
= PPC::ANDISo8
; break;
5045 // Note: During the replacement process, the nodes will be in an
5046 // inconsistent state (some instructions will have operands with values
5047 // of the wrong type). Once done, however, everything should be right
5050 SmallVector
<SDValue
, 4> Ops
;
5051 for (const SDValue
&V
: PN
->ops()) {
5052 if (!ToPromote
.count(V
.getNode()) && V
.getValueType() == MVT::i32
&&
5053 !isa
<ConstantSDNode
>(V
)) {
5054 SDValue ReplOpOps
[] = { ISR
.getOperand(0), V
, ISR
.getOperand(2) };
5056 CurDAG
->getMachineNode(TargetOpcode::INSERT_SUBREG
, SDLoc(V
),
5057 ISR
.getNode()->getVTList(), ReplOpOps
);
5058 Ops
.push_back(SDValue(ReplOp
, 0));
5064 // Because all to-be-promoted nodes only have users that are other
5065 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
5066 // the i32 result value type with i64.
5068 SmallVector
<EVT
, 2> NewVTs
;
5069 SDVTList VTs
= PN
->getVTList();
5070 for (unsigned i
= 0, ie
= VTs
.NumVTs
; i
!= ie
; ++i
)
5071 if (VTs
.VTs
[i
] == MVT::i32
)
5072 NewVTs
.push_back(MVT::i64
);
5074 NewVTs
.push_back(VTs
.VTs
[i
]);
5076 DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
5077 DEBUG(PN
->dump(CurDAG
));
5079 CurDAG
->SelectNodeTo(PN
, NewOpcode
, CurDAG
->getVTList(NewVTs
), Ops
);
5081 DEBUG(dbgs() << "\nNew: ");
5082 DEBUG(PN
->dump(CurDAG
));
5083 DEBUG(dbgs() << "\n");
5086 // Now we replace the original zero extend and its associated INSERT_SUBREG
5087 // with the value feeding the INSERT_SUBREG (which has now been promoted to
5090 DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
5091 DEBUG(N
->dump(CurDAG
));
5092 DEBUG(dbgs() << "\nNew: ");
5093 DEBUG(Op32
.getNode()->dump(CurDAG
));
5094 DEBUG(dbgs() << "\n");
5096 ReplaceUses(N
, Op32
.getNode());
5100 CurDAG
->RemoveDeadNodes();
5103 void PPCDAGToDAGISel::PeepholePPC64() {
5104 // These optimizations are currently supported only for 64-bit SVR4.
5105 if (PPCSubTarget
->isDarwin() || !PPCSubTarget
->isPPC64())
5108 SelectionDAG::allnodes_iterator
Position(CurDAG
->getRoot().getNode());
5111 while (Position
!= CurDAG
->allnodes_begin()) {
5112 SDNode
*N
= &*--Position
;
5113 // Skip dead nodes and any non-machine opcodes.
5114 if (N
->use_empty() || !N
->isMachineOpcode())
5118 unsigned StorageOpcode
= N
->getMachineOpcode();
5120 switch (StorageOpcode
) {
5151 // If this is a load or store with a zero offset, or within the alignment,
5152 // we may be able to fold an add-immediate into the memory operation.
5153 // The check against alignment is below, as it can't occur until we check
5154 // the arguments to N
5155 if (!isa
<ConstantSDNode
>(N
->getOperand(FirstOp
)))
5158 SDValue Base
= N
->getOperand(FirstOp
+ 1);
5159 if (!Base
.isMachineOpcode())
5163 bool ReplaceFlags
= true;
5165 // When the feeding operation is an add-immediate of some sort,
5166 // determine whether we need to add relocation information to the
5167 // target flags on the immediate operand when we fold it into the
5168 // load instruction.
5170 // For something like ADDItocL, the relocation information is
5171 // inferred from the opcode; when we process it in the AsmPrinter,
5172 // we add the necessary relocation there. A load, though, can receive
5173 // relocation from various flavors of ADDIxxx, so we need to carry
5174 // the relocation information in the target flags.
5175 switch (Base
.getMachineOpcode()) {
5180 // In some cases (such as TLS) the relocation information
5181 // is already in place on the operand, so copying the operand
5183 ReplaceFlags
= false;
5184 // For these cases, the immediate may not be divisible by 4, in
5185 // which case the fold is illegal for DS-form instructions. (The
5186 // other cases provide aligned addresses and are always safe.)
5187 if ((StorageOpcode
== PPC::LWA
||
5188 StorageOpcode
== PPC::LD
||
5189 StorageOpcode
== PPC::STD
) &&
5190 (!isa
<ConstantSDNode
>(Base
.getOperand(1)) ||
5191 Base
.getConstantOperandVal(1) % 4 != 0))
5194 case PPC::ADDIdtprelL
:
5195 Flags
= PPCII::MO_DTPREL_LO
;
5197 case PPC::ADDItlsldL
:
5198 Flags
= PPCII::MO_TLSLD_LO
;
5201 Flags
= PPCII::MO_TOC_LO
;
5205 SDValue ImmOpnd
= Base
.getOperand(1);
5207 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
5208 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
5209 // we might have needed different @ha relocation values for the offset
5211 int MaxDisplacement
= 7;
5212 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
5213 const GlobalValue
*GV
= GA
->getGlobal();
5214 MaxDisplacement
= std::min((int) GV
->getAlignment() - 1, MaxDisplacement
);
5217 bool UpdateHBase
= false;
5218 SDValue HBase
= Base
.getOperand(0);
5220 int Offset
= N
->getConstantOperandVal(FirstOp
);
5222 if (Offset
< 0 || Offset
> MaxDisplacement
) {
5223 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
5224 // one use, then we can do this for any offset, we just need to also
5225 // update the offset (i.e. the symbol addend) on the addis also.
5226 if (Base
.getMachineOpcode() != PPC::ADDItocL
)
5229 if (!HBase
.isMachineOpcode() ||
5230 HBase
.getMachineOpcode() != PPC::ADDIStocHA
)
5233 if (!Base
.hasOneUse() || !HBase
.hasOneUse())
5236 SDValue HImmOpnd
= HBase
.getOperand(1);
5237 if (HImmOpnd
!= ImmOpnd
)
5243 // If we're directly folding the addend from an addi instruction, then:
5244 // 1. In general, the offset on the memory access must be zero.
5245 // 2. If the addend is a constant, then it can be combined with a
5246 // non-zero offset, but only if the result meets the encoding
5248 if (auto *C
= dyn_cast
<ConstantSDNode
>(ImmOpnd
)) {
5249 Offset
+= C
->getSExtValue();
5251 if ((StorageOpcode
== PPC::LWA
|| StorageOpcode
== PPC::LD
||
5252 StorageOpcode
== PPC::STD
) && (Offset
% 4) != 0)
5255 if (!isInt
<16>(Offset
))
5258 ImmOpnd
= CurDAG
->getTargetConstant(Offset
, SDLoc(ImmOpnd
),
5259 ImmOpnd
.getValueType());
5260 } else if (Offset
!= 0) {
5265 // We found an opportunity. Reverse the operands from the add
5266 // immediate and substitute them into the load or store. If
5267 // needed, update the target flags for the immediate operand to
5268 // reflect the necessary relocation information.
5269 DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
5270 DEBUG(Base
->dump(CurDAG
));
5271 DEBUG(dbgs() << "\nN: ");
5272 DEBUG(N
->dump(CurDAG
));
5273 DEBUG(dbgs() << "\n");
5275 // If the relocation information isn't already present on the
5276 // immediate operand, add it now.
5278 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(ImmOpnd
)) {
5280 const GlobalValue
*GV
= GA
->getGlobal();
5281 // We can't perform this optimization for data whose alignment
5282 // is insufficient for the instruction encoding.
5283 if (GV
->getAlignment() < 4 &&
5284 (StorageOpcode
== PPC::LD
|| StorageOpcode
== PPC::STD
||
5285 StorageOpcode
== PPC::LWA
|| (Offset
% 4) != 0)) {
5286 DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
5289 ImmOpnd
= CurDAG
->getTargetGlobalAddress(GV
, dl
, MVT::i64
, Offset
, Flags
);
5290 } else if (ConstantPoolSDNode
*CP
=
5291 dyn_cast
<ConstantPoolSDNode
>(ImmOpnd
)) {
5292 const Constant
*C
= CP
->getConstVal();
5293 ImmOpnd
= CurDAG
->getTargetConstantPool(C
, MVT::i64
,
5299 if (FirstOp
== 1) // Store
5300 (void)CurDAG
->UpdateNodeOperands(N
, N
->getOperand(0), ImmOpnd
,
5301 Base
.getOperand(0), N
->getOperand(3));
5303 (void)CurDAG
->UpdateNodeOperands(N
, ImmOpnd
, Base
.getOperand(0),
5307 (void)CurDAG
->UpdateNodeOperands(HBase
.getNode(), HBase
.getOperand(0),
5310 // The add-immediate may now be dead, in which case remove it.
5311 if (Base
.getNode()->use_empty())
5312 CurDAG
->RemoveDeadNode(Base
.getNode());
5316 /// createPPCISelDag - This pass converts a legalized DAG into a
5317 /// PowerPC-specific DAG, ready for instruction scheduling.
5319 FunctionPass
*llvm::createPPCISelDag(PPCTargetMachine
&TM
,
5320 CodeGenOpt::Level OptLevel
) {
5321 return new PPCDAGToDAGISel(TM
, OptLevel
);