1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
78 #define DEBUG_TYPE "dagcombine"
80 STATISTIC(NodesCombined
, "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes
, "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes
, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed
, "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int
, "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads
, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv
, "Number of logic ops converted to fp ops");
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden
,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 UseTBAA("combiner-use-tbaa", cl::Hidden
, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
97 static cl::opt
<std::string
>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden
,
99 cl::desc("Only use DAG-combiner alias analysis in this"
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden
,
107 cl::desc("Bypass the profitability model of load slicing"),
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden
, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
118 const TargetLowering
&TLI
;
120 CodeGenOpt::Level OptLevel
;
121 bool LegalOperations
= false;
122 bool LegalTypes
= false;
125 /// Worklist of all of the nodes that need to be simplified.
127 /// This must behave as a stack -- new nodes to process are pushed onto the
128 /// back and when processing we pop off of the back.
130 /// The worklist will not contain duplicates but may contain null entries
131 /// due to nodes being deleted from the underlying DAG.
132 SmallVector
<SDNode
*, 64> Worklist
;
134 /// Mapping from an SDNode to its position on the worklist.
136 /// This is used to find and remove nodes from the worklist (by nulling
137 /// them) when they are deleted from the underlying DAG. It relies on
138 /// stable indices of nodes within the worklist.
139 DenseMap
<SDNode
*, unsigned> WorklistMap
;
140 /// This records all nodes attempted to add to the worklist since we
141 /// considered a new worklist entry. As we keep do not add duplicate nodes
142 /// in the worklist, this is different from the tail of the worklist.
143 SmallSetVector
<SDNode
*, 32> PruningList
;
145 /// Set of nodes which have been combined (at least once).
147 /// This is used to allow us to reliably add any operands of a DAG node
148 /// which have not yet been combined to the worklist.
149 SmallPtrSet
<SDNode
*, 32> CombinedNodes
;
151 // AA - Used for DAG load/store alias analysis.
154 /// When an instruction is simplified, add all users of the instruction to
155 /// the work lists because they might get more simplified now.
156 void AddUsersToWorklist(SDNode
*N
) {
157 for (SDNode
*Node
: N
->uses())
161 // Prune potentially dangling nodes. This is called after
162 // any visit to a node, but should also be called during a visit after any
163 // failed combine which may have created a DAG node.
164 void clearAddedDanglingWorklistEntries() {
165 // Check any nodes added to the worklist to see if they are prunable.
166 while (!PruningList
.empty()) {
167 auto *N
= PruningList
.pop_back_val();
169 recursivelyDeleteUnusedNodes(N
);
173 SDNode
*getNextWorklistEntry() {
174 // Before we do any work, remove nodes that are not in use.
175 clearAddedDanglingWorklistEntries();
177 // The Worklist holds the SDNodes in order, but it may contain null
179 while (!N
&& !Worklist
.empty()) {
180 N
= Worklist
.pop_back_val();
184 bool GoodWorklistEntry
= WorklistMap
.erase(N
);
185 (void)GoodWorklistEntry
;
186 assert(GoodWorklistEntry
&&
187 "Found a worklist entry without a corresponding map entry!");
192 /// Call the node-specific routine that folds each particular type of node.
193 SDValue
visit(SDNode
*N
);
196 DAGCombiner(SelectionDAG
&D
, AliasAnalysis
*AA
, CodeGenOpt::Level OL
)
197 : DAG(D
), TLI(D
.getTargetLoweringInfo()), Level(BeforeLegalizeTypes
),
198 OptLevel(OL
), AA(AA
) {
199 ForCodeSize
= DAG
.getMachineFunction().getFunction().hasOptSize();
201 MaximumLegalStoreInBits
= 0;
202 for (MVT VT
: MVT::all_valuetypes())
203 if (EVT(VT
).isSimple() && VT
!= MVT::Other
&&
204 TLI
.isTypeLegal(EVT(VT
)) &&
205 VT
.getSizeInBits() >= MaximumLegalStoreInBits
)
206 MaximumLegalStoreInBits
= VT
.getSizeInBits();
209 void ConsiderForPruning(SDNode
*N
) {
210 // Mark this for potential pruning.
211 PruningList
.insert(N
);
214 /// Add to the worklist making sure its instance is at the back (next to be
216 void AddToWorklist(SDNode
*N
) {
217 assert(N
->getOpcode() != ISD::DELETED_NODE
&&
218 "Deleted Node added to Worklist");
220 // Skip handle nodes as they can't usefully be combined and confuse the
221 // zero-use deletion strategy.
222 if (N
->getOpcode() == ISD::HANDLENODE
)
225 ConsiderForPruning(N
);
227 if (WorklistMap
.insert(std::make_pair(N
, Worklist
.size())).second
)
228 Worklist
.push_back(N
);
231 /// Remove all instances of N from the worklist.
232 void removeFromWorklist(SDNode
*N
) {
233 CombinedNodes
.erase(N
);
234 PruningList
.remove(N
);
236 auto It
= WorklistMap
.find(N
);
237 if (It
== WorklistMap
.end())
238 return; // Not in the worklist.
240 // Null out the entry rather than erasing it to avoid a linear operation.
241 Worklist
[It
->second
] = nullptr;
242 WorklistMap
.erase(It
);
245 void deleteAndRecombine(SDNode
*N
);
246 bool recursivelyDeleteUnusedNodes(SDNode
*N
);
248 /// Replaces all uses of the results of one DAG node with new values.
249 SDValue
CombineTo(SDNode
*N
, const SDValue
*To
, unsigned NumTo
,
252 /// Replaces all uses of the results of one DAG node with new values.
253 SDValue
CombineTo(SDNode
*N
, SDValue Res
, bool AddTo
= true) {
254 return CombineTo(N
, &Res
, 1, AddTo
);
257 /// Replaces all uses of the results of one DAG node with new values.
258 SDValue
CombineTo(SDNode
*N
, SDValue Res0
, SDValue Res1
,
260 SDValue To
[] = { Res0
, Res1
};
261 return CombineTo(N
, To
, 2, AddTo
);
264 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt
&TLO
);
267 unsigned MaximumLegalStoreInBits
;
269 /// Check the specified integer node value to see if it can be simplified or
270 /// if things it uses can be simplified by bit propagation.
271 /// If so, return true.
272 bool SimplifyDemandedBits(SDValue Op
) {
273 unsigned BitWidth
= Op
.getScalarValueSizeInBits();
274 APInt DemandedBits
= APInt::getAllOnesValue(BitWidth
);
275 return SimplifyDemandedBits(Op
, DemandedBits
);
278 bool SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
) {
279 EVT VT
= Op
.getValueType();
280 unsigned NumElts
= VT
.isVector() ? VT
.getVectorNumElements() : 1;
281 APInt DemandedElts
= APInt::getAllOnesValue(NumElts
);
282 return SimplifyDemandedBits(Op
, DemandedBits
, DemandedElts
);
285 /// Check the specified vector node value to see if it can be simplified or
286 /// if things it uses can be simplified as it only uses some of the
287 /// elements. If so, return true.
288 bool SimplifyDemandedVectorElts(SDValue Op
) {
289 unsigned NumElts
= Op
.getValueType().getVectorNumElements();
290 APInt DemandedElts
= APInt::getAllOnesValue(NumElts
);
291 return SimplifyDemandedVectorElts(Op
, DemandedElts
);
294 bool SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
,
295 const APInt
&DemandedElts
);
296 bool SimplifyDemandedVectorElts(SDValue Op
, const APInt
&DemandedElts
,
297 bool AssumeSingleUse
= false);
299 bool CombineToPreIndexedLoadStore(SDNode
*N
);
300 bool CombineToPostIndexedLoadStore(SDNode
*N
);
301 SDValue
SplitIndexingFromLoad(LoadSDNode
*LD
);
302 bool SliceUpLoad(SDNode
*N
);
304 // Scalars have size 0 to distinguish from singleton vectors.
305 SDValue
ForwardStoreValueToDirectLoad(LoadSDNode
*LD
);
306 bool getTruncatedStoreValue(StoreSDNode
*ST
, SDValue
&Val
);
307 bool extendLoadedValueToExtension(LoadSDNode
*LD
, SDValue
&Val
);
309 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
312 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
313 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
314 /// \param EltNo index of the vector element to load.
315 /// \param OriginalLoad load that EVE came from to be replaced.
316 /// \returns EVE on success SDValue() on failure.
317 SDValue
scalarizeExtractedVectorLoad(SDNode
*EVE
, EVT InVecVT
,
319 LoadSDNode
*OriginalLoad
);
320 void ReplaceLoadWithPromotedLoad(SDNode
*Load
, SDNode
*ExtLoad
);
321 SDValue
PromoteOperand(SDValue Op
, EVT PVT
, bool &Replace
);
322 SDValue
SExtPromoteOperand(SDValue Op
, EVT PVT
);
323 SDValue
ZExtPromoteOperand(SDValue Op
, EVT PVT
);
324 SDValue
PromoteIntBinOp(SDValue Op
);
325 SDValue
PromoteIntShiftOp(SDValue Op
);
326 SDValue
PromoteExtend(SDValue Op
);
327 bool PromoteLoad(SDValue Op
);
329 /// Call the node-specific routine that knows how to fold each
330 /// particular type of node. If that doesn't do anything, try the
331 /// target-specific DAG combines.
332 SDValue
combine(SDNode
*N
);
334 // Visitation implementation - Implement dag node combining for different
335 // node types. The semantics are as follows:
337 // SDValue.getNode() == 0 - No change was made
338 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
339 // otherwise - N should be replaced by the returned Operand.
341 SDValue
visitTokenFactor(SDNode
*N
);
342 SDValue
visitMERGE_VALUES(SDNode
*N
);
343 SDValue
visitADD(SDNode
*N
);
344 SDValue
visitADDLike(SDNode
*N
);
345 SDValue
visitADDLikeCommutative(SDValue N0
, SDValue N1
, SDNode
*LocReference
);
346 SDValue
visitSUB(SDNode
*N
);
347 SDValue
visitADDSAT(SDNode
*N
);
348 SDValue
visitSUBSAT(SDNode
*N
);
349 SDValue
visitADDC(SDNode
*N
);
350 SDValue
visitADDO(SDNode
*N
);
351 SDValue
visitUADDOLike(SDValue N0
, SDValue N1
, SDNode
*N
);
352 SDValue
visitSUBC(SDNode
*N
);
353 SDValue
visitSUBO(SDNode
*N
);
354 SDValue
visitADDE(SDNode
*N
);
355 SDValue
visitADDCARRY(SDNode
*N
);
356 SDValue
visitADDCARRYLike(SDValue N0
, SDValue N1
, SDValue CarryIn
, SDNode
*N
);
357 SDValue
visitSUBE(SDNode
*N
);
358 SDValue
visitSUBCARRY(SDNode
*N
);
359 SDValue
visitMUL(SDNode
*N
);
360 SDValue
useDivRem(SDNode
*N
);
361 SDValue
visitSDIV(SDNode
*N
);
362 SDValue
visitSDIVLike(SDValue N0
, SDValue N1
, SDNode
*N
);
363 SDValue
visitUDIV(SDNode
*N
);
364 SDValue
visitUDIVLike(SDValue N0
, SDValue N1
, SDNode
*N
);
365 SDValue
visitREM(SDNode
*N
);
366 SDValue
visitMULHU(SDNode
*N
);
367 SDValue
visitMULHS(SDNode
*N
);
368 SDValue
visitSMUL_LOHI(SDNode
*N
);
369 SDValue
visitUMUL_LOHI(SDNode
*N
);
370 SDValue
visitMULO(SDNode
*N
);
371 SDValue
visitIMINMAX(SDNode
*N
);
372 SDValue
visitAND(SDNode
*N
);
373 SDValue
visitANDLike(SDValue N0
, SDValue N1
, SDNode
*N
);
374 SDValue
visitOR(SDNode
*N
);
375 SDValue
visitORLike(SDValue N0
, SDValue N1
, SDNode
*N
);
376 SDValue
visitXOR(SDNode
*N
);
377 SDValue
SimplifyVBinOp(SDNode
*N
);
378 SDValue
visitSHL(SDNode
*N
);
379 SDValue
visitSRA(SDNode
*N
);
380 SDValue
visitSRL(SDNode
*N
);
381 SDValue
visitFunnelShift(SDNode
*N
);
382 SDValue
visitRotate(SDNode
*N
);
383 SDValue
visitABS(SDNode
*N
);
384 SDValue
visitBSWAP(SDNode
*N
);
385 SDValue
visitBITREVERSE(SDNode
*N
);
386 SDValue
visitCTLZ(SDNode
*N
);
387 SDValue
visitCTLZ_ZERO_UNDEF(SDNode
*N
);
388 SDValue
visitCTTZ(SDNode
*N
);
389 SDValue
visitCTTZ_ZERO_UNDEF(SDNode
*N
);
390 SDValue
visitCTPOP(SDNode
*N
);
391 SDValue
visitSELECT(SDNode
*N
);
392 SDValue
visitVSELECT(SDNode
*N
);
393 SDValue
visitSELECT_CC(SDNode
*N
);
394 SDValue
visitSETCC(SDNode
*N
);
395 SDValue
visitSETCCCARRY(SDNode
*N
);
396 SDValue
visitSIGN_EXTEND(SDNode
*N
);
397 SDValue
visitZERO_EXTEND(SDNode
*N
);
398 SDValue
visitANY_EXTEND(SDNode
*N
);
399 SDValue
visitAssertExt(SDNode
*N
);
400 SDValue
visitSIGN_EXTEND_INREG(SDNode
*N
);
401 SDValue
visitSIGN_EXTEND_VECTOR_INREG(SDNode
*N
);
402 SDValue
visitZERO_EXTEND_VECTOR_INREG(SDNode
*N
);
403 SDValue
visitTRUNCATE(SDNode
*N
);
404 SDValue
visitBITCAST(SDNode
*N
);
405 SDValue
visitBUILD_PAIR(SDNode
*N
);
406 SDValue
visitFADD(SDNode
*N
);
407 SDValue
visitFSUB(SDNode
*N
);
408 SDValue
visitFMUL(SDNode
*N
);
409 SDValue
visitFMA(SDNode
*N
);
410 SDValue
visitFDIV(SDNode
*N
);
411 SDValue
visitFREM(SDNode
*N
);
412 SDValue
visitFSQRT(SDNode
*N
);
413 SDValue
visitFCOPYSIGN(SDNode
*N
);
414 SDValue
visitFPOW(SDNode
*N
);
415 SDValue
visitSINT_TO_FP(SDNode
*N
);
416 SDValue
visitUINT_TO_FP(SDNode
*N
);
417 SDValue
visitFP_TO_SINT(SDNode
*N
);
418 SDValue
visitFP_TO_UINT(SDNode
*N
);
419 SDValue
visitFP_ROUND(SDNode
*N
);
420 SDValue
visitFP_ROUND_INREG(SDNode
*N
);
421 SDValue
visitFP_EXTEND(SDNode
*N
);
422 SDValue
visitFNEG(SDNode
*N
);
423 SDValue
visitFABS(SDNode
*N
);
424 SDValue
visitFCEIL(SDNode
*N
);
425 SDValue
visitFTRUNC(SDNode
*N
);
426 SDValue
visitFFLOOR(SDNode
*N
);
427 SDValue
visitFMINNUM(SDNode
*N
);
428 SDValue
visitFMAXNUM(SDNode
*N
);
429 SDValue
visitFMINIMUM(SDNode
*N
);
430 SDValue
visitFMAXIMUM(SDNode
*N
);
431 SDValue
visitBRCOND(SDNode
*N
);
432 SDValue
visitBR_CC(SDNode
*N
);
433 SDValue
visitLOAD(SDNode
*N
);
435 SDValue
replaceStoreChain(StoreSDNode
*ST
, SDValue BetterChain
);
436 SDValue
replaceStoreOfFPConstant(StoreSDNode
*ST
);
438 SDValue
visitSTORE(SDNode
*N
);
439 SDValue
visitLIFETIME_END(SDNode
*N
);
440 SDValue
visitINSERT_VECTOR_ELT(SDNode
*N
);
441 SDValue
visitEXTRACT_VECTOR_ELT(SDNode
*N
);
442 SDValue
visitBUILD_VECTOR(SDNode
*N
);
443 SDValue
visitCONCAT_VECTORS(SDNode
*N
);
444 SDValue
visitEXTRACT_SUBVECTOR(SDNode
*N
);
445 SDValue
visitVECTOR_SHUFFLE(SDNode
*N
);
446 SDValue
visitSCALAR_TO_VECTOR(SDNode
*N
);
447 SDValue
visitINSERT_SUBVECTOR(SDNode
*N
);
448 SDValue
visitMLOAD(SDNode
*N
);
449 SDValue
visitMSTORE(SDNode
*N
);
450 SDValue
visitMGATHER(SDNode
*N
);
451 SDValue
visitMSCATTER(SDNode
*N
);
452 SDValue
visitFP_TO_FP16(SDNode
*N
);
453 SDValue
visitFP16_TO_FP(SDNode
*N
);
454 SDValue
visitVECREDUCE(SDNode
*N
);
456 SDValue
visitFADDForFMACombine(SDNode
*N
);
457 SDValue
visitFSUBForFMACombine(SDNode
*N
);
458 SDValue
visitFMULForFMADistributiveCombine(SDNode
*N
);
460 SDValue
XformToShuffleWithZero(SDNode
*N
);
461 SDValue
reassociateOpsCommutative(unsigned Opc
, const SDLoc
&DL
, SDValue N0
,
463 SDValue
reassociateOps(unsigned Opc
, const SDLoc
&DL
, SDValue N0
,
464 SDValue N1
, SDNodeFlags Flags
);
466 SDValue
visitShiftByConstant(SDNode
*N
, ConstantSDNode
*Amt
);
468 SDValue
foldSelectOfConstants(SDNode
*N
);
469 SDValue
foldVSelectOfConstants(SDNode
*N
);
470 SDValue
foldBinOpIntoSelect(SDNode
*BO
);
471 bool SimplifySelectOps(SDNode
*SELECT
, SDValue LHS
, SDValue RHS
);
472 SDValue
hoistLogicOpWithSameOpcodeHands(SDNode
*N
);
473 SDValue
SimplifySelect(const SDLoc
&DL
, SDValue N0
, SDValue N1
, SDValue N2
);
474 SDValue
SimplifySelectCC(const SDLoc
&DL
, SDValue N0
, SDValue N1
,
475 SDValue N2
, SDValue N3
, ISD::CondCode CC
,
476 bool NotExtCompare
= false);
477 SDValue
convertSelectOfFPConstantsToLoadOffset(
478 const SDLoc
&DL
, SDValue N0
, SDValue N1
, SDValue N2
, SDValue N3
,
480 SDValue
foldSelectCCToShiftAnd(const SDLoc
&DL
, SDValue N0
, SDValue N1
,
481 SDValue N2
, SDValue N3
, ISD::CondCode CC
);
482 SDValue
foldLogicOfSetCCs(bool IsAnd
, SDValue N0
, SDValue N1
,
484 SDValue
unfoldMaskedMerge(SDNode
*N
);
485 SDValue
unfoldExtremeBitClearingToShifts(SDNode
*N
);
486 SDValue
SimplifySetCC(EVT VT
, SDValue N0
, SDValue N1
, ISD::CondCode Cond
,
487 const SDLoc
&DL
, bool foldBooleans
);
488 SDValue
rebuildSetCC(SDValue N
);
490 bool isSetCCEquivalent(SDValue N
, SDValue
&LHS
, SDValue
&RHS
,
492 bool isOneUseSetCC(SDValue N
) const;
494 SDValue
SimplifyNodeWithTwoResults(SDNode
*N
, unsigned LoOp
,
496 SDValue
CombineConsecutiveLoads(SDNode
*N
, EVT VT
);
497 SDValue
CombineExtLoad(SDNode
*N
);
498 SDValue
CombineZExtLogicopShiftLoad(SDNode
*N
);
499 SDValue
combineRepeatedFPDivisors(SDNode
*N
);
500 SDValue
combineInsertEltToShuffle(SDNode
*N
, unsigned InsIndex
);
501 SDValue
ConstantFoldBITCASTofBUILD_VECTOR(SDNode
*, EVT
);
502 SDValue
BuildSDIV(SDNode
*N
);
503 SDValue
BuildSDIVPow2(SDNode
*N
);
504 SDValue
BuildUDIV(SDNode
*N
);
505 SDValue
BuildLogBase2(SDValue V
, const SDLoc
&DL
);
506 SDValue
BuildReciprocalEstimate(SDValue Op
, SDNodeFlags Flags
);
507 SDValue
buildRsqrtEstimate(SDValue Op
, SDNodeFlags Flags
);
508 SDValue
buildSqrtEstimate(SDValue Op
, SDNodeFlags Flags
);
509 SDValue
buildSqrtEstimateImpl(SDValue Op
, SDNodeFlags Flags
, bool Recip
);
510 SDValue
buildSqrtNROneConst(SDValue Arg
, SDValue Est
, unsigned Iterations
,
511 SDNodeFlags Flags
, bool Reciprocal
);
512 SDValue
buildSqrtNRTwoConst(SDValue Arg
, SDValue Est
, unsigned Iterations
,
513 SDNodeFlags Flags
, bool Reciprocal
);
514 SDValue
MatchBSwapHWordLow(SDNode
*N
, SDValue N0
, SDValue N1
,
515 bool DemandHighBits
= true);
516 SDValue
MatchBSwapHWord(SDNode
*N
, SDValue N0
, SDValue N1
);
517 SDNode
*MatchRotatePosNeg(SDValue Shifted
, SDValue Pos
, SDValue Neg
,
518 SDValue InnerPos
, SDValue InnerNeg
,
519 unsigned PosOpcode
, unsigned NegOpcode
,
521 SDNode
*MatchRotate(SDValue LHS
, SDValue RHS
, const SDLoc
&DL
);
522 SDValue
MatchLoadCombine(SDNode
*N
);
523 SDValue
ReduceLoadWidth(SDNode
*N
);
524 SDValue
ReduceLoadOpStoreWidth(SDNode
*N
);
525 SDValue
splitMergedValStore(StoreSDNode
*ST
);
526 SDValue
TransformFPLoadStorePair(SDNode
*N
);
527 SDValue
convertBuildVecZextToZext(SDNode
*N
);
528 SDValue
reduceBuildVecExtToExtBuildVec(SDNode
*N
);
529 SDValue
reduceBuildVecToShuffle(SDNode
*N
);
530 SDValue
createBuildVecShuffle(const SDLoc
&DL
, SDNode
*N
,
531 ArrayRef
<int> VectorMask
, SDValue VecIn1
,
532 SDValue VecIn2
, unsigned LeftIdx
,
534 SDValue
matchVSelectOpSizesWithSetCC(SDNode
*Cast
);
536 /// Walk up chain skipping non-aliasing memory nodes,
537 /// looking for aliasing nodes and adding them to the Aliases vector.
538 void GatherAllAliases(SDNode
*N
, SDValue OriginalChain
,
539 SmallVectorImpl
<SDValue
> &Aliases
);
541 /// Return true if there is any possibility that the two addresses overlap.
542 bool isAlias(SDNode
*Op0
, SDNode
*Op1
) const;
544 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
545 /// chain (aliasing node.)
546 SDValue
FindBetterChain(SDNode
*N
, SDValue Chain
);
548 /// Try to replace a store and any possibly adjacent stores on
549 /// consecutive chains with better chains. Return true only if St is
552 /// Notice that other chains may still be replaced even if the function
554 bool findBetterNeighborChains(StoreSDNode
*St
);
556 // Helper for findBetterNeighborChains. Walk up store chain add additional
557 // chained stores that do not overlap and can be parallelized.
558 bool parallelizeChainedStores(StoreSDNode
*St
);
560 /// Holds a pointer to an LSBaseSDNode as well as information on where it
561 /// is located in a sequence of memory operations connected by a chain.
563 // Ptr to the mem node.
564 LSBaseSDNode
*MemNode
;
566 // Offset from the base ptr.
567 int64_t OffsetFromBase
;
569 MemOpLink(LSBaseSDNode
*N
, int64_t Offset
)
570 : MemNode(N
), OffsetFromBase(Offset
) {}
573 /// This is a helper function for visitMUL to check the profitability
574 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
575 /// MulNode is the original multiply, AddNode is (add x, c1),
576 /// and ConstNode is c2.
577 bool isMulAddWithConstProfitable(SDNode
*MulNode
,
581 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
582 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
583 /// the type of the loaded value to be extended.
584 bool isAndLoadExtLoad(ConstantSDNode
*AndC
, LoadSDNode
*LoadN
,
585 EVT LoadResultTy
, EVT
&ExtVT
);
587 /// Helper function to calculate whether the given Load/Store can have its
588 /// width reduced to ExtVT.
589 bool isLegalNarrowLdSt(LSBaseSDNode
*LDSTN
, ISD::LoadExtType ExtType
,
590 EVT
&MemVT
, unsigned ShAmt
= 0);
592 /// Used by BackwardsPropagateMask to find suitable loads.
593 bool SearchForAndLoads(SDNode
*N
, SmallVectorImpl
<LoadSDNode
*> &Loads
,
594 SmallPtrSetImpl
<SDNode
*> &NodesWithConsts
,
595 ConstantSDNode
*Mask
, SDNode
*&NodeToMask
);
596 /// Attempt to propagate a given AND node back to load leaves so that they
597 /// can be combined into narrow loads.
598 bool BackwardsPropagateMask(SDNode
*N
, SelectionDAG
&DAG
);
600 /// Helper function for MergeConsecutiveStores which merges the
601 /// component store chains.
602 SDValue
getMergeStoreChains(SmallVectorImpl
<MemOpLink
> &StoreNodes
,
605 /// This is a helper function for MergeConsecutiveStores. When the
606 /// source elements of the consecutive stores are all constants or
607 /// all extracted vector elements, try to merge them into one
608 /// larger store introducing bitcasts if necessary. \return True
609 /// if a merged store was created.
610 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl
<MemOpLink
> &StoreNodes
,
611 EVT MemVT
, unsigned NumStores
,
612 bool IsConstantSrc
, bool UseVector
,
615 /// This is a helper function for MergeConsecutiveStores. Stores
616 /// that potentially may be merged with St are placed in
617 /// StoreNodes. RootNode is a chain predecessor to all store
619 void getStoreMergeCandidates(StoreSDNode
*St
,
620 SmallVectorImpl
<MemOpLink
> &StoreNodes
,
623 /// Helper function for MergeConsecutiveStores. Checks if
624 /// candidate stores have indirect dependency through their
625 /// operands. RootNode is the predecessor to all stores calculated
626 /// by getStoreMergeCandidates and is used to prune the dependency check.
627 /// \return True if safe to merge.
628 bool checkMergeStoreCandidatesForDependencies(
629 SmallVectorImpl
<MemOpLink
> &StoreNodes
, unsigned NumStores
,
632 /// Merge consecutive store operations into a wide store.
633 /// This optimization uses wide integers or vectors when possible.
634 /// \return number of stores that were merged into a merged store (the
635 /// affected nodes are stored as a prefix in \p StoreNodes).
636 bool MergeConsecutiveStores(StoreSDNode
*St
);
638 /// Try to transform a truncation where C is a constant:
639 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
641 /// \p N needs to be a truncation and its first operand an AND. Other
642 /// requirements are checked by the function (e.g. that trunc is
643 /// single-use) and if missed an empty SDValue is returned.
644 SDValue
distributeTruncateThroughAnd(SDNode
*N
);
646 /// Helper function to determine whether the target supports operation
647 /// given by \p Opcode for type \p VT, that is, whether the operation
648 /// is legal or custom before legalizing operations, and whether is
649 /// legal (but not custom) after legalization.
650 bool hasOperation(unsigned Opcode
, EVT VT
) {
652 return TLI
.isOperationLegal(Opcode
, VT
);
653 return TLI
.isOperationLegalOrCustom(Opcode
, VT
);
657 /// Runs the dag combiner on all nodes in the work list
658 void Run(CombineLevel AtLevel
);
660 SelectionDAG
&getDAG() const { return DAG
; }
662 /// Returns a type large enough to hold any valid shift amount - before type
663 /// legalization these can be huge.
664 EVT
getShiftAmountTy(EVT LHSTy
) {
665 assert(LHSTy
.isInteger() && "Shift amount is not an integer type!");
666 return TLI
.getShiftAmountTy(LHSTy
, DAG
.getDataLayout(), LegalTypes
);
669 /// This method returns true if we are running before type legalization or
670 /// if the specified VT is legal.
671 bool isTypeLegal(const EVT
&VT
) {
672 if (!LegalTypes
) return true;
673 return TLI
.isTypeLegal(VT
);
676 /// Convenience wrapper around TargetLowering::getSetCCResultType
677 EVT
getSetCCResultType(EVT VT
) const {
678 return TLI
.getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
681 void ExtendSetCCUses(const SmallVectorImpl
<SDNode
*> &SetCCs
,
682 SDValue OrigLoad
, SDValue ExtLoad
,
683 ISD::NodeType ExtType
);
686 /// This class is a DAGUpdateListener that removes any deleted
687 /// nodes from the worklist.
688 class WorklistRemover
: public SelectionDAG::DAGUpdateListener
{
692 explicit WorklistRemover(DAGCombiner
&dc
)
693 : SelectionDAG::DAGUpdateListener(dc
.getDAG()), DC(dc
) {}
695 void NodeDeleted(SDNode
*N
, SDNode
*E
) override
{
696 DC
.removeFromWorklist(N
);
700 class WorklistInserter
: public SelectionDAG::DAGUpdateListener
{
704 explicit WorklistInserter(DAGCombiner
&dc
)
705 : SelectionDAG::DAGUpdateListener(dc
.getDAG()), DC(dc
) {}
707 // FIXME: Ideally we could add N to the worklist, but this causes exponential
708 // compile time costs in large DAGs, e.g. Halide.
709 void NodeInserted(SDNode
*N
) override
{ DC
.ConsiderForPruning(N
); }
712 } // end anonymous namespace
714 //===----------------------------------------------------------------------===//
715 // TargetLowering::DAGCombinerInfo implementation
716 //===----------------------------------------------------------------------===//
718 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode
*N
) {
719 ((DAGCombiner
*)DC
)->AddToWorklist(N
);
722 SDValue
TargetLowering::DAGCombinerInfo::
723 CombineTo(SDNode
*N
, ArrayRef
<SDValue
> To
, bool AddTo
) {
724 return ((DAGCombiner
*)DC
)->CombineTo(N
, &To
[0], To
.size(), AddTo
);
727 SDValue
TargetLowering::DAGCombinerInfo::
728 CombineTo(SDNode
*N
, SDValue Res
, bool AddTo
) {
729 return ((DAGCombiner
*)DC
)->CombineTo(N
, Res
, AddTo
);
732 SDValue
TargetLowering::DAGCombinerInfo::
733 CombineTo(SDNode
*N
, SDValue Res0
, SDValue Res1
, bool AddTo
) {
734 return ((DAGCombiner
*)DC
)->CombineTo(N
, Res0
, Res1
, AddTo
);
737 void TargetLowering::DAGCombinerInfo::
738 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt
&TLO
) {
739 return ((DAGCombiner
*)DC
)->CommitTargetLoweringOpt(TLO
);
742 //===----------------------------------------------------------------------===//
744 //===----------------------------------------------------------------------===//
746 void DAGCombiner::deleteAndRecombine(SDNode
*N
) {
747 removeFromWorklist(N
);
749 // If the operands of this node are only used by the node, they will now be
750 // dead. Make sure to re-visit them and recursively delete dead nodes.
751 for (const SDValue
&Op
: N
->ops())
752 // For an operand generating multiple values, one of the values may
753 // become dead allowing further simplification (e.g. split index
754 // arithmetic from an indexed load).
755 if (Op
->hasOneUse() || Op
->getNumValues() > 1)
756 AddToWorklist(Op
.getNode());
761 /// Return 1 if we can compute the negated form of the specified expression for
762 /// the same cost as the expression itself, or 2 if we can compute the negated
763 /// form more cheaply than the expression itself.
764 static char isNegatibleForFree(SDValue Op
, bool LegalOperations
,
765 const TargetLowering
&TLI
,
766 const TargetOptions
*Options
,
768 unsigned Depth
= 0) {
769 // fneg is removable even if it has multiple uses.
770 if (Op
.getOpcode() == ISD::FNEG
) return 2;
772 // Don't allow anything with multiple uses unless we know it is free.
773 EVT VT
= Op
.getValueType();
774 const SDNodeFlags Flags
= Op
->getFlags();
776 if (!(Op
.getOpcode() == ISD::FP_EXTEND
&&
777 TLI
.isFPExtFree(VT
, Op
.getOperand(0).getValueType())))
780 // Don't recurse exponentially.
781 if (Depth
> 6) return 0;
783 switch (Op
.getOpcode()) {
784 default: return false;
785 case ISD::ConstantFP
: {
786 if (!LegalOperations
)
789 // Don't invert constant FP values after legalization unless the target says
790 // the negated constant is legal.
791 return TLI
.isOperationLegal(ISD::ConstantFP
, VT
) ||
792 TLI
.isFPImmLegal(neg(cast
<ConstantFPSDNode
>(Op
)->getValueAPF()), VT
,
796 if (!Options
->UnsafeFPMath
&& !Flags
.hasNoSignedZeros())
799 // After operation legalization, it might not be legal to create new FSUBs.
800 if (LegalOperations
&& !TLI
.isOperationLegalOrCustom(ISD::FSUB
, VT
))
803 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
804 if (char V
= isNegatibleForFree(Op
.getOperand(0), LegalOperations
, TLI
,
805 Options
, ForCodeSize
, Depth
+ 1))
807 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
808 return isNegatibleForFree(Op
.getOperand(1), LegalOperations
, TLI
, Options
,
809 ForCodeSize
, Depth
+ 1);
811 // We can't turn -(A-B) into B-A when we honor signed zeros.
812 if (!Options
->NoSignedZerosFPMath
&&
813 !Flags
.hasNoSignedZeros())
816 // fold (fneg (fsub A, B)) -> (fsub B, A)
821 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
822 if (char V
= isNegatibleForFree(Op
.getOperand(0), LegalOperations
, TLI
,
823 Options
, ForCodeSize
, Depth
+ 1))
826 return isNegatibleForFree(Op
.getOperand(1), LegalOperations
, TLI
, Options
,
827 ForCodeSize
, Depth
+ 1);
832 return isNegatibleForFree(Op
.getOperand(0), LegalOperations
, TLI
, Options
,
833 ForCodeSize
, Depth
+ 1);
837 /// If isNegatibleForFree returns true, return the newly negated expression.
838 static SDValue
GetNegatedExpression(SDValue Op
, SelectionDAG
&DAG
,
839 bool LegalOperations
, bool ForCodeSize
,
840 unsigned Depth
= 0) {
841 const TargetOptions
&Options
= DAG
.getTarget().Options
;
842 // fneg is removable even if it has multiple uses.
843 if (Op
.getOpcode() == ISD::FNEG
) return Op
.getOperand(0);
845 assert(Depth
<= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
847 const SDNodeFlags Flags
= Op
.getNode()->getFlags();
849 switch (Op
.getOpcode()) {
850 default: llvm_unreachable("Unknown code");
851 case ISD::ConstantFP
: {
852 APFloat V
= cast
<ConstantFPSDNode
>(Op
)->getValueAPF();
854 return DAG
.getConstantFP(V
, SDLoc(Op
), Op
.getValueType());
857 assert(Options
.UnsafeFPMath
|| Flags
.hasNoSignedZeros());
859 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
860 if (isNegatibleForFree(Op
.getOperand(0), LegalOperations
,
861 DAG
.getTargetLoweringInfo(), &Options
, ForCodeSize
,
863 return DAG
.getNode(ISD::FSUB
, SDLoc(Op
), Op
.getValueType(),
864 GetNegatedExpression(Op
.getOperand(0), DAG
,
865 LegalOperations
, ForCodeSize
,
867 Op
.getOperand(1), Flags
);
868 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
869 return DAG
.getNode(ISD::FSUB
, SDLoc(Op
), Op
.getValueType(),
870 GetNegatedExpression(Op
.getOperand(1), DAG
,
871 LegalOperations
, ForCodeSize
,
873 Op
.getOperand(0), Flags
);
875 // fold (fneg (fsub 0, B)) -> B
876 if (ConstantFPSDNode
*N0CFP
= dyn_cast
<ConstantFPSDNode
>(Op
.getOperand(0)))
878 return Op
.getOperand(1);
880 // fold (fneg (fsub A, B)) -> (fsub B, A)
881 return DAG
.getNode(ISD::FSUB
, SDLoc(Op
), Op
.getValueType(),
882 Op
.getOperand(1), Op
.getOperand(0), Flags
);
886 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
887 if (isNegatibleForFree(Op
.getOperand(0), LegalOperations
,
888 DAG
.getTargetLoweringInfo(), &Options
, ForCodeSize
,
890 return DAG
.getNode(Op
.getOpcode(), SDLoc(Op
), Op
.getValueType(),
891 GetNegatedExpression(Op
.getOperand(0), DAG
,
892 LegalOperations
, ForCodeSize
,
894 Op
.getOperand(1), Flags
);
896 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
897 return DAG
.getNode(Op
.getOpcode(), SDLoc(Op
), Op
.getValueType(),
899 GetNegatedExpression(Op
.getOperand(1), DAG
,
900 LegalOperations
, ForCodeSize
,
905 return DAG
.getNode(Op
.getOpcode(), SDLoc(Op
), Op
.getValueType(),
906 GetNegatedExpression(Op
.getOperand(0), DAG
,
907 LegalOperations
, ForCodeSize
,
910 return DAG
.getNode(ISD::FP_ROUND
, SDLoc(Op
), Op
.getValueType(),
911 GetNegatedExpression(Op
.getOperand(0), DAG
,
912 LegalOperations
, ForCodeSize
,
918 // APInts must be the same size for most operations, this helper
919 // function zero extends the shorter of the pair so that they match.
920 // We provide an Offset so that we can create bitwidths that won't overflow.
921 static void zeroExtendToMatch(APInt
&LHS
, APInt
&RHS
, unsigned Offset
= 0) {
922 unsigned Bits
= Offset
+ std::max(LHS
.getBitWidth(), RHS
.getBitWidth());
923 LHS
= LHS
.zextOrSelf(Bits
);
924 RHS
= RHS
.zextOrSelf(Bits
);
927 // Return true if this node is a setcc, or is a select_cc
928 // that selects between the target values used for true and false, making it
929 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
930 // the appropriate nodes based on the type of node we are checking. This
931 // simplifies life a bit for the callers.
932 bool DAGCombiner::isSetCCEquivalent(SDValue N
, SDValue
&LHS
, SDValue
&RHS
,
934 if (N
.getOpcode() == ISD::SETCC
) {
935 LHS
= N
.getOperand(0);
936 RHS
= N
.getOperand(1);
937 CC
= N
.getOperand(2);
941 if (N
.getOpcode() != ISD::SELECT_CC
||
942 !TLI
.isConstTrueVal(N
.getOperand(2).getNode()) ||
943 !TLI
.isConstFalseVal(N
.getOperand(3).getNode()))
946 if (TLI
.getBooleanContents(N
.getValueType()) ==
947 TargetLowering::UndefinedBooleanContent
)
950 LHS
= N
.getOperand(0);
951 RHS
= N
.getOperand(1);
952 CC
= N
.getOperand(4);
956 /// Return true if this is a SetCC-equivalent operation with only one use.
957 /// If this is true, it allows the users to invert the operation for free when
958 /// it is profitable to do so.
959 bool DAGCombiner::isOneUseSetCC(SDValue N
) const {
961 if (isSetCCEquivalent(N
, N0
, N1
, N2
) && N
.getNode()->hasOneUse())
966 // Returns the SDNode if it is a constant float BuildVector
967 // or constant float.
968 static SDNode
*isConstantFPBuildVectorOrConstantFP(SDValue N
) {
969 if (isa
<ConstantFPSDNode
>(N
))
971 if (ISD::isBuildVectorOfConstantFPSDNodes(N
.getNode()))
976 // Determines if it is a constant integer or a build vector of constant
977 // integers (and undefs).
978 // Do not permit build vector implicit truncation.
979 static bool isConstantOrConstantVector(SDValue N
, bool NoOpaques
= false) {
980 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(N
))
981 return !(Const
->isOpaque() && NoOpaques
);
982 if (N
.getOpcode() != ISD::BUILD_VECTOR
)
984 unsigned BitWidth
= N
.getScalarValueSizeInBits();
985 for (const SDValue
&Op
: N
->op_values()) {
988 ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(Op
);
989 if (!Const
|| Const
->getAPIntValue().getBitWidth() != BitWidth
||
990 (Const
->isOpaque() && NoOpaques
))
996 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
998 static bool isAnyConstantBuildVector(SDValue V
, bool NoOpaques
= false) {
999 if (V
.getOpcode() != ISD::BUILD_VECTOR
)
1001 return isConstantOrConstantVector(V
, NoOpaques
) ||
1002 ISD::isBuildVectorOfConstantFPSDNodes(V
.getNode());
1005 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1006 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1007 SDValue
DAGCombiner::reassociateOpsCommutative(unsigned Opc
, const SDLoc
&DL
,
1008 SDValue N0
, SDValue N1
) {
1009 EVT VT
= N0
.getValueType();
1011 if (N0
.getOpcode() != Opc
)
1014 // Don't reassociate reductions.
1015 if (N0
->getFlags().hasVectorReduction())
1018 if (SDNode
*C1
= DAG
.isConstantIntBuildVectorOrConstantInt(N0
.getOperand(1))) {
1019 if (SDNode
*C2
= DAG
.isConstantIntBuildVectorOrConstantInt(N1
)) {
1020 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1021 if (SDValue OpNode
= DAG
.FoldConstantArithmetic(Opc
, DL
, VT
, C1
, C2
))
1022 return DAG
.getNode(Opc
, DL
, VT
, N0
.getOperand(0), OpNode
);
1025 if (N0
.hasOneUse()) {
1026 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1027 // iff (op x, c1) has one use
1028 SDValue OpNode
= DAG
.getNode(Opc
, SDLoc(N0
), VT
, N0
.getOperand(0), N1
);
1029 if (!OpNode
.getNode())
1031 AddToWorklist(OpNode
.getNode());
1032 return DAG
.getNode(Opc
, DL
, VT
, OpNode
, N0
.getOperand(1));
1038 // Try to reassociate commutative binops.
1039 SDValue
DAGCombiner::reassociateOps(unsigned Opc
, const SDLoc
&DL
, SDValue N0
,
1040 SDValue N1
, SDNodeFlags Flags
) {
1041 assert(TLI
.isCommutativeBinOp(Opc
) && "Operation not commutative.");
1042 // Don't reassociate reductions.
1043 if (Flags
.hasVectorReduction())
1045 if (SDValue Combined
= reassociateOpsCommutative(Opc
, DL
, N0
, N1
))
1047 if (SDValue Combined
= reassociateOpsCommutative(Opc
, DL
, N1
, N0
))
1052 SDValue
DAGCombiner::CombineTo(SDNode
*N
, const SDValue
*To
, unsigned NumTo
,
1054 assert(N
->getNumValues() == NumTo
&& "Broken CombineTo call!");
1056 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N
->dump(&DAG
); dbgs() << "\nWith: ";
1057 To
[0].getNode()->dump(&DAG
);
1058 dbgs() << " and " << NumTo
- 1 << " other values\n");
1059 for (unsigned i
= 0, e
= NumTo
; i
!= e
; ++i
)
1060 assert((!To
[i
].getNode() ||
1061 N
->getValueType(i
) == To
[i
].getValueType()) &&
1062 "Cannot combine value to value of different type!");
1064 WorklistRemover
DeadNodes(*this);
1065 DAG
.ReplaceAllUsesWith(N
, To
);
1067 // Push the new nodes and any users onto the worklist
1068 for (unsigned i
= 0, e
= NumTo
; i
!= e
; ++i
) {
1069 if (To
[i
].getNode()) {
1070 AddToWorklist(To
[i
].getNode());
1071 AddUsersToWorklist(To
[i
].getNode());
1076 // Finally, if the node is now dead, remove it from the graph. The node
1077 // may not be dead if the replacement process recursively simplified to
1078 // something else needing this node.
1080 deleteAndRecombine(N
);
1081 return SDValue(N
, 0);
1085 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt
&TLO
) {
1086 // Replace all uses. If any nodes become isomorphic to other nodes and
1087 // are deleted, make sure to remove them from our worklist.
1088 WorklistRemover
DeadNodes(*this);
1089 DAG
.ReplaceAllUsesOfValueWith(TLO
.Old
, TLO
.New
);
1091 // Push the new node and any (possibly new) users onto the worklist.
1092 AddToWorklist(TLO
.New
.getNode());
1093 AddUsersToWorklist(TLO
.New
.getNode());
1095 // Finally, if the node is now dead, remove it from the graph. The node
1096 // may not be dead if the replacement process recursively simplified to
1097 // something else needing this node.
1098 if (TLO
.Old
.getNode()->use_empty())
1099 deleteAndRecombine(TLO
.Old
.getNode());
1102 /// Check the specified integer node value to see if it can be simplified or if
1103 /// things it uses can be simplified by bit propagation. If so, return true.
1104 bool DAGCombiner::SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
,
1105 const APInt
&DemandedElts
) {
1106 TargetLowering::TargetLoweringOpt
TLO(DAG
, LegalTypes
, LegalOperations
);
1108 if (!TLI
.SimplifyDemandedBits(Op
, DemandedBits
, DemandedElts
, Known
, TLO
))
1111 // Revisit the node.
1112 AddToWorklist(Op
.getNode());
1114 // Replace the old value with the new one.
1116 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO
.Old
.getNode()->dump(&DAG
);
1117 dbgs() << "\nWith: "; TLO
.New
.getNode()->dump(&DAG
);
1120 CommitTargetLoweringOpt(TLO
);
1124 /// Check the specified vector node value to see if it can be simplified or
1125 /// if things it uses can be simplified as it only uses some of the elements.
1126 /// If so, return true.
1127 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op
,
1128 const APInt
&DemandedElts
,
1129 bool AssumeSingleUse
) {
1130 TargetLowering::TargetLoweringOpt
TLO(DAG
, LegalTypes
, LegalOperations
);
1131 APInt KnownUndef
, KnownZero
;
1132 if (!TLI
.SimplifyDemandedVectorElts(Op
, DemandedElts
, KnownUndef
, KnownZero
,
1133 TLO
, 0, AssumeSingleUse
))
1136 // Revisit the node.
1137 AddToWorklist(Op
.getNode());
1139 // Replace the old value with the new one.
1141 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO
.Old
.getNode()->dump(&DAG
);
1142 dbgs() << "\nWith: "; TLO
.New
.getNode()->dump(&DAG
);
1145 CommitTargetLoweringOpt(TLO
);
1149 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode
*Load
, SDNode
*ExtLoad
) {
1151 EVT VT
= Load
->getValueType(0);
1152 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, SDValue(ExtLoad
, 0));
1154 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load
->dump(&DAG
); dbgs() << "\nWith: ";
1155 Trunc
.getNode()->dump(&DAG
); dbgs() << '\n');
1156 WorklistRemover
DeadNodes(*this);
1157 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 0), Trunc
);
1158 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 1), SDValue(ExtLoad
, 1));
1159 deleteAndRecombine(Load
);
1160 AddToWorklist(Trunc
.getNode());
1163 SDValue
DAGCombiner::PromoteOperand(SDValue Op
, EVT PVT
, bool &Replace
) {
1166 if (ISD::isUNINDEXEDLoad(Op
.getNode())) {
1167 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op
);
1168 EVT MemVT
= LD
->getMemoryVT();
1169 ISD::LoadExtType ExtType
= ISD::isNON_EXTLoad(LD
) ? ISD::EXTLOAD
1170 : LD
->getExtensionType();
1172 return DAG
.getExtLoad(ExtType
, DL
, PVT
,
1173 LD
->getChain(), LD
->getBasePtr(),
1174 MemVT
, LD
->getMemOperand());
1177 unsigned Opc
= Op
.getOpcode();
1180 case ISD::AssertSext
:
1181 if (SDValue Op0
= SExtPromoteOperand(Op
.getOperand(0), PVT
))
1182 return DAG
.getNode(ISD::AssertSext
, DL
, PVT
, Op0
, Op
.getOperand(1));
1184 case ISD::AssertZext
:
1185 if (SDValue Op0
= ZExtPromoteOperand(Op
.getOperand(0), PVT
))
1186 return DAG
.getNode(ISD::AssertZext
, DL
, PVT
, Op0
, Op
.getOperand(1));
1188 case ISD::Constant
: {
1190 Op
.getValueType().isByteSized() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
1191 return DAG
.getNode(ExtOpc
, DL
, PVT
, Op
);
1195 if (!TLI
.isOperationLegal(ISD::ANY_EXTEND
, PVT
))
1197 return DAG
.getNode(ISD::ANY_EXTEND
, DL
, PVT
, Op
);
1200 SDValue
DAGCombiner::SExtPromoteOperand(SDValue Op
, EVT PVT
) {
1201 if (!TLI
.isOperationLegal(ISD::SIGN_EXTEND_INREG
, PVT
))
1203 EVT OldVT
= Op
.getValueType();
1205 bool Replace
= false;
1206 SDValue NewOp
= PromoteOperand(Op
, PVT
, Replace
);
1207 if (!NewOp
.getNode())
1209 AddToWorklist(NewOp
.getNode());
1212 ReplaceLoadWithPromotedLoad(Op
.getNode(), NewOp
.getNode());
1213 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, NewOp
.getValueType(), NewOp
,
1214 DAG
.getValueType(OldVT
));
1217 SDValue
DAGCombiner::ZExtPromoteOperand(SDValue Op
, EVT PVT
) {
1218 EVT OldVT
= Op
.getValueType();
1220 bool Replace
= false;
1221 SDValue NewOp
= PromoteOperand(Op
, PVT
, Replace
);
1222 if (!NewOp
.getNode())
1224 AddToWorklist(NewOp
.getNode());
1227 ReplaceLoadWithPromotedLoad(Op
.getNode(), NewOp
.getNode());
1228 return DAG
.getZeroExtendInReg(NewOp
, DL
, OldVT
);
1231 /// Promote the specified integer binary operation if the target indicates it is
1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1233 /// i32 since i16 instructions are longer.
1234 SDValue
DAGCombiner::PromoteIntBinOp(SDValue Op
) {
1235 if (!LegalOperations
)
1238 EVT VT
= Op
.getValueType();
1239 if (VT
.isVector() || !VT
.isInteger())
1242 // If operation type is 'undesirable', e.g. i16 on x86, consider
1244 unsigned Opc
= Op
.getOpcode();
1245 if (TLI
.isTypeDesirableForOp(Opc
, VT
))
1249 // Consult target whether it is a good idea to promote this operation and
1250 // what's the right type to promote it to.
1251 if (TLI
.IsDesirableToPromoteOp(Op
, PVT
)) {
1252 assert(PVT
!= VT
&& "Don't know what type to promote to!");
1254 LLVM_DEBUG(dbgs() << "\nPromoting "; Op
.getNode()->dump(&DAG
));
1256 bool Replace0
= false;
1257 SDValue N0
= Op
.getOperand(0);
1258 SDValue NN0
= PromoteOperand(N0
, PVT
, Replace0
);
1260 bool Replace1
= false;
1261 SDValue N1
= Op
.getOperand(1);
1262 SDValue NN1
= PromoteOperand(N1
, PVT
, Replace1
);
1266 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, DAG
.getNode(Opc
, DL
, PVT
, NN0
, NN1
));
1268 // We are always replacing N0/N1's use in N and only need
1269 // additional replacements if there are additional uses.
1270 Replace0
&= !N0
->hasOneUse();
1271 Replace1
&= (N0
!= N1
) && !N1
->hasOneUse();
1273 // Combine Op here so it is preserved past replacements.
1274 CombineTo(Op
.getNode(), RV
);
1276 // If operands have a use ordering, make sure we deal with
1277 // predecessor first.
1278 if (Replace0
&& Replace1
&& N0
.getNode()->isPredecessorOf(N1
.getNode())) {
1280 std::swap(NN0
, NN1
);
1284 AddToWorklist(NN0
.getNode());
1285 ReplaceLoadWithPromotedLoad(N0
.getNode(), NN0
.getNode());
1288 AddToWorklist(NN1
.getNode());
1289 ReplaceLoadWithPromotedLoad(N1
.getNode(), NN1
.getNode());
1296 /// Promote the specified integer shift operation if the target indicates it is
1297 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1298 /// i32 since i16 instructions are longer.
1299 SDValue
DAGCombiner::PromoteIntShiftOp(SDValue Op
) {
1300 if (!LegalOperations
)
1303 EVT VT
= Op
.getValueType();
1304 if (VT
.isVector() || !VT
.isInteger())
1307 // If operation type is 'undesirable', e.g. i16 on x86, consider
1309 unsigned Opc
= Op
.getOpcode();
1310 if (TLI
.isTypeDesirableForOp(Opc
, VT
))
1314 // Consult target whether it is a good idea to promote this operation and
1315 // what's the right type to promote it to.
1316 if (TLI
.IsDesirableToPromoteOp(Op
, PVT
)) {
1317 assert(PVT
!= VT
&& "Don't know what type to promote to!");
1319 LLVM_DEBUG(dbgs() << "\nPromoting "; Op
.getNode()->dump(&DAG
));
1321 bool Replace
= false;
1322 SDValue N0
= Op
.getOperand(0);
1323 SDValue N1
= Op
.getOperand(1);
1324 if (Opc
== ISD::SRA
)
1325 N0
= SExtPromoteOperand(N0
, PVT
);
1326 else if (Opc
== ISD::SRL
)
1327 N0
= ZExtPromoteOperand(N0
, PVT
);
1329 N0
= PromoteOperand(N0
, PVT
, Replace
);
1336 DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, DAG
.getNode(Opc
, DL
, PVT
, N0
, N1
));
1338 AddToWorklist(N0
.getNode());
1340 ReplaceLoadWithPromotedLoad(Op
.getOperand(0).getNode(), N0
.getNode());
1342 // Deal with Op being deleted.
1343 if (Op
&& Op
.getOpcode() != ISD::DELETED_NODE
)
1349 SDValue
DAGCombiner::PromoteExtend(SDValue Op
) {
1350 if (!LegalOperations
)
1353 EVT VT
= Op
.getValueType();
1354 if (VT
.isVector() || !VT
.isInteger())
1357 // If operation type is 'undesirable', e.g. i16 on x86, consider
1359 unsigned Opc
= Op
.getOpcode();
1360 if (TLI
.isTypeDesirableForOp(Opc
, VT
))
1364 // Consult target whether it is a good idea to promote this operation and
1365 // what's the right type to promote it to.
1366 if (TLI
.IsDesirableToPromoteOp(Op
, PVT
)) {
1367 assert(PVT
!= VT
&& "Don't know what type to promote to!");
1368 // fold (aext (aext x)) -> (aext x)
1369 // fold (aext (zext x)) -> (zext x)
1370 // fold (aext (sext x)) -> (sext x)
1371 LLVM_DEBUG(dbgs() << "\nPromoting "; Op
.getNode()->dump(&DAG
));
1372 return DAG
.getNode(Op
.getOpcode(), SDLoc(Op
), VT
, Op
.getOperand(0));
1377 bool DAGCombiner::PromoteLoad(SDValue Op
) {
1378 if (!LegalOperations
)
1381 if (!ISD::isUNINDEXEDLoad(Op
.getNode()))
1384 EVT VT
= Op
.getValueType();
1385 if (VT
.isVector() || !VT
.isInteger())
1388 // If operation type is 'undesirable', e.g. i16 on x86, consider
1390 unsigned Opc
= Op
.getOpcode();
1391 if (TLI
.isTypeDesirableForOp(Opc
, VT
))
1395 // Consult target whether it is a good idea to promote this operation and
1396 // what's the right type to promote it to.
1397 if (TLI
.IsDesirableToPromoteOp(Op
, PVT
)) {
1398 assert(PVT
!= VT
&& "Don't know what type to promote to!");
1401 SDNode
*N
= Op
.getNode();
1402 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
1403 EVT MemVT
= LD
->getMemoryVT();
1404 ISD::LoadExtType ExtType
= ISD::isNON_EXTLoad(LD
) ? ISD::EXTLOAD
1405 : LD
->getExtensionType();
1406 SDValue NewLD
= DAG
.getExtLoad(ExtType
, DL
, PVT
,
1407 LD
->getChain(), LD
->getBasePtr(),
1408 MemVT
, LD
->getMemOperand());
1409 SDValue Result
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, NewLD
);
1411 LLVM_DEBUG(dbgs() << "\nPromoting "; N
->dump(&DAG
); dbgs() << "\nTo: ";
1412 Result
.getNode()->dump(&DAG
); dbgs() << '\n');
1413 WorklistRemover
DeadNodes(*this);
1414 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
1415 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), NewLD
.getValue(1));
1416 deleteAndRecombine(N
);
1417 AddToWorklist(Result
.getNode());
1423 /// Recursively delete a node which has no uses and any operands for
1424 /// which it is the only use.
1426 /// Note that this both deletes the nodes and removes them from the worklist.
1427 /// It also adds any nodes who have had a user deleted to the worklist as they
1428 /// may now have only one use and subject to other combines.
1429 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode
*N
) {
1430 if (!N
->use_empty())
1433 SmallSetVector
<SDNode
*, 16> Nodes
;
1436 N
= Nodes
.pop_back_val();
1440 if (N
->use_empty()) {
1441 for (const SDValue
&ChildN
: N
->op_values())
1442 Nodes
.insert(ChildN
.getNode());
1444 removeFromWorklist(N
);
1449 } while (!Nodes
.empty());
1453 //===----------------------------------------------------------------------===//
1454 // Main DAG Combiner implementation
1455 //===----------------------------------------------------------------------===//
1457 void DAGCombiner::Run(CombineLevel AtLevel
) {
1458 // set the instance variables, so that the various visit routines may use it.
1460 LegalOperations
= Level
>= AfterLegalizeVectorOps
;
1461 LegalTypes
= Level
>= AfterLegalizeTypes
;
1463 WorklistInserter
AddNodes(*this);
1465 // Add all the dag nodes to the worklist.
1466 for (SDNode
&Node
: DAG
.allnodes())
1467 AddToWorklist(&Node
);
1469 // Create a dummy node (which is not added to allnodes), that adds a reference
1470 // to the root node, preventing it from being deleted, and tracking any
1471 // changes of the root.
1472 HandleSDNode
Dummy(DAG
.getRoot());
1474 // While we have a valid worklist entry node, try to combine it.
1475 while (SDNode
*N
= getNextWorklistEntry()) {
1476 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1477 // N is deleted from the DAG, since they too may now be dead or may have a
1478 // reduced number of uses, allowing other xforms.
1479 if (recursivelyDeleteUnusedNodes(N
))
1482 WorklistRemover
DeadNodes(*this);
1484 // If this combine is running after legalizing the DAG, re-legalize any
1485 // nodes pulled off the worklist.
1486 if (Level
== AfterLegalizeDAG
) {
1487 SmallSetVector
<SDNode
*, 16> UpdatedNodes
;
1488 bool NIsValid
= DAG
.LegalizeOp(N
, UpdatedNodes
);
1490 for (SDNode
*LN
: UpdatedNodes
) {
1492 AddUsersToWorklist(LN
);
1498 LLVM_DEBUG(dbgs() << "\nCombining: "; N
->dump(&DAG
));
1500 // Add any operands of the new node which have not yet been combined to the
1501 // worklist as well. Because the worklist uniques things already, this
1502 // won't repeatedly process the same operand.
1503 CombinedNodes
.insert(N
);
1504 for (const SDValue
&ChildN
: N
->op_values())
1505 if (!CombinedNodes
.count(ChildN
.getNode()))
1506 AddToWorklist(ChildN
.getNode());
1508 SDValue RV
= combine(N
);
1515 // If we get back the same node we passed in, rather than a new node or
1516 // zero, we know that the node must have defined multiple values and
1517 // CombineTo was used. Since CombineTo takes care of the worklist
1518 // mechanics for us, we have no work to do in this case.
1519 if (RV
.getNode() == N
)
1522 assert(N
->getOpcode() != ISD::DELETED_NODE
&&
1523 RV
.getOpcode() != ISD::DELETED_NODE
&&
1524 "Node was deleted but visit returned new node!");
1526 LLVM_DEBUG(dbgs() << " ... into: "; RV
.getNode()->dump(&DAG
));
1528 if (N
->getNumValues() == RV
.getNode()->getNumValues())
1529 DAG
.ReplaceAllUsesWith(N
, RV
.getNode());
1531 assert(N
->getValueType(0) == RV
.getValueType() &&
1532 N
->getNumValues() == 1 && "Type mismatch");
1533 DAG
.ReplaceAllUsesWith(N
, &RV
);
1536 // Push the new node and any users onto the worklist
1537 AddToWorklist(RV
.getNode());
1538 AddUsersToWorklist(RV
.getNode());
1540 // Finally, if the node is now dead, remove it from the graph. The node
1541 // may not be dead if the replacement process recursively simplified to
1542 // something else needing this node. This will also take care of adding any
1543 // operands which have lost a user to the worklist.
1544 recursivelyDeleteUnusedNodes(N
);
1547 // If the root changed (e.g. it was a dead load, update the root).
1548 DAG
.setRoot(Dummy
.getValue());
1549 DAG
.RemoveDeadNodes();
1552 SDValue
DAGCombiner::visit(SDNode
*N
) {
1553 switch (N
->getOpcode()) {
1555 case ISD::TokenFactor
: return visitTokenFactor(N
);
1556 case ISD::MERGE_VALUES
: return visitMERGE_VALUES(N
);
1557 case ISD::ADD
: return visitADD(N
);
1558 case ISD::SUB
: return visitSUB(N
);
1560 case ISD::UADDSAT
: return visitADDSAT(N
);
1562 case ISD::USUBSAT
: return visitSUBSAT(N
);
1563 case ISD::ADDC
: return visitADDC(N
);
1565 case ISD::UADDO
: return visitADDO(N
);
1566 case ISD::SUBC
: return visitSUBC(N
);
1568 case ISD::USUBO
: return visitSUBO(N
);
1569 case ISD::ADDE
: return visitADDE(N
);
1570 case ISD::ADDCARRY
: return visitADDCARRY(N
);
1571 case ISD::SUBE
: return visitSUBE(N
);
1572 case ISD::SUBCARRY
: return visitSUBCARRY(N
);
1573 case ISD::MUL
: return visitMUL(N
);
1574 case ISD::SDIV
: return visitSDIV(N
);
1575 case ISD::UDIV
: return visitUDIV(N
);
1577 case ISD::UREM
: return visitREM(N
);
1578 case ISD::MULHU
: return visitMULHU(N
);
1579 case ISD::MULHS
: return visitMULHS(N
);
1580 case ISD::SMUL_LOHI
: return visitSMUL_LOHI(N
);
1581 case ISD::UMUL_LOHI
: return visitUMUL_LOHI(N
);
1583 case ISD::UMULO
: return visitMULO(N
);
1587 case ISD::UMAX
: return visitIMINMAX(N
);
1588 case ISD::AND
: return visitAND(N
);
1589 case ISD::OR
: return visitOR(N
);
1590 case ISD::XOR
: return visitXOR(N
);
1591 case ISD::SHL
: return visitSHL(N
);
1592 case ISD::SRA
: return visitSRA(N
);
1593 case ISD::SRL
: return visitSRL(N
);
1595 case ISD::ROTL
: return visitRotate(N
);
1597 case ISD::FSHR
: return visitFunnelShift(N
);
1598 case ISD::ABS
: return visitABS(N
);
1599 case ISD::BSWAP
: return visitBSWAP(N
);
1600 case ISD::BITREVERSE
: return visitBITREVERSE(N
);
1601 case ISD::CTLZ
: return visitCTLZ(N
);
1602 case ISD::CTLZ_ZERO_UNDEF
: return visitCTLZ_ZERO_UNDEF(N
);
1603 case ISD::CTTZ
: return visitCTTZ(N
);
1604 case ISD::CTTZ_ZERO_UNDEF
: return visitCTTZ_ZERO_UNDEF(N
);
1605 case ISD::CTPOP
: return visitCTPOP(N
);
1606 case ISD::SELECT
: return visitSELECT(N
);
1607 case ISD::VSELECT
: return visitVSELECT(N
);
1608 case ISD::SELECT_CC
: return visitSELECT_CC(N
);
1609 case ISD::SETCC
: return visitSETCC(N
);
1610 case ISD::SETCCCARRY
: return visitSETCCCARRY(N
);
1611 case ISD::SIGN_EXTEND
: return visitSIGN_EXTEND(N
);
1612 case ISD::ZERO_EXTEND
: return visitZERO_EXTEND(N
);
1613 case ISD::ANY_EXTEND
: return visitANY_EXTEND(N
);
1614 case ISD::AssertSext
:
1615 case ISD::AssertZext
: return visitAssertExt(N
);
1616 case ISD::SIGN_EXTEND_INREG
: return visitSIGN_EXTEND_INREG(N
);
1617 case ISD::SIGN_EXTEND_VECTOR_INREG
: return visitSIGN_EXTEND_VECTOR_INREG(N
);
1618 case ISD::ZERO_EXTEND_VECTOR_INREG
: return visitZERO_EXTEND_VECTOR_INREG(N
);
1619 case ISD::TRUNCATE
: return visitTRUNCATE(N
);
1620 case ISD::BITCAST
: return visitBITCAST(N
);
1621 case ISD::BUILD_PAIR
: return visitBUILD_PAIR(N
);
1622 case ISD::FADD
: return visitFADD(N
);
1623 case ISD::FSUB
: return visitFSUB(N
);
1624 case ISD::FMUL
: return visitFMUL(N
);
1625 case ISD::FMA
: return visitFMA(N
);
1626 case ISD::FDIV
: return visitFDIV(N
);
1627 case ISD::FREM
: return visitFREM(N
);
1628 case ISD::FSQRT
: return visitFSQRT(N
);
1629 case ISD::FCOPYSIGN
: return visitFCOPYSIGN(N
);
1630 case ISD::FPOW
: return visitFPOW(N
);
1631 case ISD::SINT_TO_FP
: return visitSINT_TO_FP(N
);
1632 case ISD::UINT_TO_FP
: return visitUINT_TO_FP(N
);
1633 case ISD::FP_TO_SINT
: return visitFP_TO_SINT(N
);
1634 case ISD::FP_TO_UINT
: return visitFP_TO_UINT(N
);
1635 case ISD::FP_ROUND
: return visitFP_ROUND(N
);
1636 case ISD::FP_ROUND_INREG
: return visitFP_ROUND_INREG(N
);
1637 case ISD::FP_EXTEND
: return visitFP_EXTEND(N
);
1638 case ISD::FNEG
: return visitFNEG(N
);
1639 case ISD::FABS
: return visitFABS(N
);
1640 case ISD::FFLOOR
: return visitFFLOOR(N
);
1641 case ISD::FMINNUM
: return visitFMINNUM(N
);
1642 case ISD::FMAXNUM
: return visitFMAXNUM(N
);
1643 case ISD::FMINIMUM
: return visitFMINIMUM(N
);
1644 case ISD::FMAXIMUM
: return visitFMAXIMUM(N
);
1645 case ISD::FCEIL
: return visitFCEIL(N
);
1646 case ISD::FTRUNC
: return visitFTRUNC(N
);
1647 case ISD::BRCOND
: return visitBRCOND(N
);
1648 case ISD::BR_CC
: return visitBR_CC(N
);
1649 case ISD::LOAD
: return visitLOAD(N
);
1650 case ISD::STORE
: return visitSTORE(N
);
1651 case ISD::INSERT_VECTOR_ELT
: return visitINSERT_VECTOR_ELT(N
);
1652 case ISD::EXTRACT_VECTOR_ELT
: return visitEXTRACT_VECTOR_ELT(N
);
1653 case ISD::BUILD_VECTOR
: return visitBUILD_VECTOR(N
);
1654 case ISD::CONCAT_VECTORS
: return visitCONCAT_VECTORS(N
);
1655 case ISD::EXTRACT_SUBVECTOR
: return visitEXTRACT_SUBVECTOR(N
);
1656 case ISD::VECTOR_SHUFFLE
: return visitVECTOR_SHUFFLE(N
);
1657 case ISD::SCALAR_TO_VECTOR
: return visitSCALAR_TO_VECTOR(N
);
1658 case ISD::INSERT_SUBVECTOR
: return visitINSERT_SUBVECTOR(N
);
1659 case ISD::MGATHER
: return visitMGATHER(N
);
1660 case ISD::MLOAD
: return visitMLOAD(N
);
1661 case ISD::MSCATTER
: return visitMSCATTER(N
);
1662 case ISD::MSTORE
: return visitMSTORE(N
);
1663 case ISD::LIFETIME_END
: return visitLIFETIME_END(N
);
1664 case ISD::FP_TO_FP16
: return visitFP_TO_FP16(N
);
1665 case ISD::FP16_TO_FP
: return visitFP16_TO_FP(N
);
1666 case ISD::VECREDUCE_FADD
:
1667 case ISD::VECREDUCE_FMUL
:
1668 case ISD::VECREDUCE_ADD
:
1669 case ISD::VECREDUCE_MUL
:
1670 case ISD::VECREDUCE_AND
:
1671 case ISD::VECREDUCE_OR
:
1672 case ISD::VECREDUCE_XOR
:
1673 case ISD::VECREDUCE_SMAX
:
1674 case ISD::VECREDUCE_SMIN
:
1675 case ISD::VECREDUCE_UMAX
:
1676 case ISD::VECREDUCE_UMIN
:
1677 case ISD::VECREDUCE_FMAX
:
1678 case ISD::VECREDUCE_FMIN
: return visitVECREDUCE(N
);
1683 SDValue
DAGCombiner::combine(SDNode
*N
) {
1684 SDValue RV
= visit(N
);
1686 // If nothing happened, try a target-specific DAG combine.
1687 if (!RV
.getNode()) {
1688 assert(N
->getOpcode() != ISD::DELETED_NODE
&&
1689 "Node was deleted but visit returned NULL!");
1691 if (N
->getOpcode() >= ISD::BUILTIN_OP_END
||
1692 TLI
.hasTargetDAGCombine((ISD::NodeType
)N
->getOpcode())) {
1694 // Expose the DAG combiner to the target combiner impls.
1695 TargetLowering::DAGCombinerInfo
1696 DagCombineInfo(DAG
, Level
, false, this);
1698 RV
= TLI
.PerformDAGCombine(N
, DagCombineInfo
);
1702 // If nothing happened still, try promoting the operation.
1703 if (!RV
.getNode()) {
1704 switch (N
->getOpcode()) {
1712 RV
= PromoteIntBinOp(SDValue(N
, 0));
1717 RV
= PromoteIntShiftOp(SDValue(N
, 0));
1719 case ISD::SIGN_EXTEND
:
1720 case ISD::ZERO_EXTEND
:
1721 case ISD::ANY_EXTEND
:
1722 RV
= PromoteExtend(SDValue(N
, 0));
1725 if (PromoteLoad(SDValue(N
, 0)))
1731 // If N is a commutative binary node, try eliminate it if the commuted
1732 // version is already present in the DAG.
1733 if (!RV
.getNode() && TLI
.isCommutativeBinOp(N
->getOpcode()) &&
1734 N
->getNumValues() == 1) {
1735 SDValue N0
= N
->getOperand(0);
1736 SDValue N1
= N
->getOperand(1);
1738 // Constant operands are canonicalized to RHS.
1739 if (N0
!= N1
&& (isa
<ConstantSDNode
>(N0
) || !isa
<ConstantSDNode
>(N1
))) {
1740 SDValue Ops
[] = {N1
, N0
};
1741 SDNode
*CSENode
= DAG
.getNodeIfExists(N
->getOpcode(), N
->getVTList(), Ops
,
1744 return SDValue(CSENode
, 0);
1751 /// Given a node, return its input chain if it has one, otherwise return a null
1753 static SDValue
getInputChainForNode(SDNode
*N
) {
1754 if (unsigned NumOps
= N
->getNumOperands()) {
1755 if (N
->getOperand(0).getValueType() == MVT::Other
)
1756 return N
->getOperand(0);
1757 if (N
->getOperand(NumOps
-1).getValueType() == MVT::Other
)
1758 return N
->getOperand(NumOps
-1);
1759 for (unsigned i
= 1; i
< NumOps
-1; ++i
)
1760 if (N
->getOperand(i
).getValueType() == MVT::Other
)
1761 return N
->getOperand(i
);
1766 SDValue
DAGCombiner::visitTokenFactor(SDNode
*N
) {
1767 // If N has two operands, where one has an input chain equal to the other,
1768 // the 'other' chain is redundant.
1769 if (N
->getNumOperands() == 2) {
1770 if (getInputChainForNode(N
->getOperand(0).getNode()) == N
->getOperand(1))
1771 return N
->getOperand(0);
1772 if (getInputChainForNode(N
->getOperand(1).getNode()) == N
->getOperand(0))
1773 return N
->getOperand(1);
1776 // Don't simplify token factors if optnone.
1777 if (OptLevel
== CodeGenOpt::None
)
1780 // If the sole user is a token factor, we should make sure we have a
1781 // chance to merge them together. This prevents TF chains from inhibiting
1783 if (N
->hasOneUse() && N
->use_begin()->getOpcode() == ISD::TokenFactor
)
1784 AddToWorklist(*(N
->use_begin()));
1786 SmallVector
<SDNode
*, 8> TFs
; // List of token factors to visit.
1787 SmallVector
<SDValue
, 8> Ops
; // Ops for replacing token factor.
1788 SmallPtrSet
<SDNode
*, 16> SeenOps
;
1789 bool Changed
= false; // If we should replace this token factor.
1791 // Start out with this token factor.
1794 // Iterate through token factors. The TFs grows when new token factors are
1795 // encountered. Limit number of nodes to inline, to avoid quadratic compile
1797 for (unsigned i
= 0; i
< TFs
.size() && Ops
.size() <= 2048; ++i
) {
1798 SDNode
*TF
= TFs
[i
];
1800 // Check each of the operands.
1801 for (const SDValue
&Op
: TF
->op_values()) {
1802 switch (Op
.getOpcode()) {
1803 case ISD::EntryToken
:
1804 // Entry tokens don't need to be added to the list. They are
1809 case ISD::TokenFactor
:
1810 if (Op
.hasOneUse() && !is_contained(TFs
, Op
.getNode())) {
1811 // Queue up for processing.
1812 TFs
.push_back(Op
.getNode());
1813 // Clean up in case the token factor is removed.
1814 AddToWorklist(Op
.getNode());
1821 // Only add if it isn't already in the list.
1822 if (SeenOps
.insert(Op
.getNode()).second
)
1831 // Remove Nodes that are chained to another node in the list. Do so
1832 // by walking up chains breath-first stopping when we've seen
1833 // another operand. In general we must climb to the EntryNode, but we can exit
1834 // early if we find all remaining work is associated with just one operand as
1835 // no further pruning is possible.
1837 // List of nodes to search through and original Ops from which they originate.
1838 SmallVector
<std::pair
<SDNode
*, unsigned>, 8> Worklist
;
1839 SmallVector
<unsigned, 8> OpWorkCount
; // Count of work for each Op.
1840 SmallPtrSet
<SDNode
*, 16> SeenChains
;
1841 bool DidPruneOps
= false;
1843 unsigned NumLeftToConsider
= 0;
1844 for (const SDValue
&Op
: Ops
) {
1845 Worklist
.push_back(std::make_pair(Op
.getNode(), NumLeftToConsider
++));
1846 OpWorkCount
.push_back(1);
1849 auto AddToWorklist
= [&](unsigned CurIdx
, SDNode
*Op
, unsigned OpNumber
) {
1850 // If this is an Op, we can remove the op from the list. Remark any
1851 // search associated with it as from the current OpNumber.
1852 if (SeenOps
.count(Op
) != 0) {
1855 unsigned OrigOpNumber
= 0;
1856 while (OrigOpNumber
< Ops
.size() && Ops
[OrigOpNumber
].getNode() != Op
)
1858 assert((OrigOpNumber
!= Ops
.size()) &&
1859 "expected to find TokenFactor Operand");
1860 // Re-mark worklist from OrigOpNumber to OpNumber
1861 for (unsigned i
= CurIdx
+ 1; i
< Worklist
.size(); ++i
) {
1862 if (Worklist
[i
].second
== OrigOpNumber
) {
1863 Worklist
[i
].second
= OpNumber
;
1866 OpWorkCount
[OpNumber
] += OpWorkCount
[OrigOpNumber
];
1867 OpWorkCount
[OrigOpNumber
] = 0;
1868 NumLeftToConsider
--;
1870 // Add if it's a new chain
1871 if (SeenChains
.insert(Op
).second
) {
1872 OpWorkCount
[OpNumber
]++;
1873 Worklist
.push_back(std::make_pair(Op
, OpNumber
));
1877 for (unsigned i
= 0; i
< Worklist
.size() && i
< 1024; ++i
) {
1878 // We need at least be consider at least 2 Ops to prune.
1879 if (NumLeftToConsider
<= 1)
1881 auto CurNode
= Worklist
[i
].first
;
1882 auto CurOpNumber
= Worklist
[i
].second
;
1883 assert((OpWorkCount
[CurOpNumber
] > 0) &&
1884 "Node should not appear in worklist");
1885 switch (CurNode
->getOpcode()) {
1886 case ISD::EntryToken
:
1887 // Hitting EntryToken is the only way for the search to terminate without
1889 // another operand's search. Prevent us from marking this operand
1891 NumLeftToConsider
++;
1893 case ISD::TokenFactor
:
1894 for (const SDValue
&Op
: CurNode
->op_values())
1895 AddToWorklist(i
, Op
.getNode(), CurOpNumber
);
1897 case ISD::LIFETIME_START
:
1898 case ISD::LIFETIME_END
:
1899 case ISD::CopyFromReg
:
1900 case ISD::CopyToReg
:
1901 AddToWorklist(i
, CurNode
->getOperand(0).getNode(), CurOpNumber
);
1904 if (auto *MemNode
= dyn_cast
<MemSDNode
>(CurNode
))
1905 AddToWorklist(i
, MemNode
->getChain().getNode(), CurOpNumber
);
1908 OpWorkCount
[CurOpNumber
]--;
1909 if (OpWorkCount
[CurOpNumber
] == 0)
1910 NumLeftToConsider
--;
1913 // If we've changed things around then replace token factor.
1917 // The entry token is the only possible outcome.
1918 Result
= DAG
.getEntryNode();
1921 SmallVector
<SDValue
, 8> PrunedOps
;
1923 for (const SDValue
&Op
: Ops
) {
1924 if (SeenChains
.count(Op
.getNode()) == 0)
1925 PrunedOps
.push_back(Op
);
1927 Result
= DAG
.getTokenFactor(SDLoc(N
), PrunedOps
);
1929 Result
= DAG
.getTokenFactor(SDLoc(N
), Ops
);
1937 /// MERGE_VALUES can always be eliminated.
1938 SDValue
DAGCombiner::visitMERGE_VALUES(SDNode
*N
) {
1939 WorklistRemover
DeadNodes(*this);
1940 // Replacing results may cause a different MERGE_VALUES to suddenly
1941 // be CSE'd with N, and carry its uses with it. Iterate until no
1942 // uses remain, to ensure that the node can be safely deleted.
1943 // First add the users of this node to the work list so that they
1944 // can be tried again once they have new operands.
1945 AddUsersToWorklist(N
);
1947 // Do as a single replacement to avoid rewalking use lists.
1948 SmallVector
<SDValue
, 8> Ops
;
1949 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
)
1950 Ops
.push_back(N
->getOperand(i
));
1951 DAG
.ReplaceAllUsesWith(N
, Ops
.data());
1952 } while (!N
->use_empty());
1953 deleteAndRecombine(N
);
1954 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
1957 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1958 /// ConstantSDNode pointer else nullptr.
1959 static ConstantSDNode
*getAsNonOpaqueConstant(SDValue N
) {
1960 ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(N
);
1961 return Const
!= nullptr && !Const
->isOpaque() ? Const
: nullptr;
1964 SDValue
DAGCombiner::foldBinOpIntoSelect(SDNode
*BO
) {
1965 assert(ISD::isBinaryOp(BO
) && "Unexpected binary operator");
1967 // Don't do this unless the old select is going away. We want to eliminate the
1968 // binary operator, not replace a binop with a select.
1969 // TODO: Handle ISD::SELECT_CC.
1970 unsigned SelOpNo
= 0;
1971 SDValue Sel
= BO
->getOperand(0);
1972 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse()) {
1974 Sel
= BO
->getOperand(1);
1977 if (Sel
.getOpcode() != ISD::SELECT
|| !Sel
.hasOneUse())
1980 SDValue CT
= Sel
.getOperand(1);
1981 if (!isConstantOrConstantVector(CT
, true) &&
1982 !isConstantFPBuildVectorOrConstantFP(CT
))
1985 SDValue CF
= Sel
.getOperand(2);
1986 if (!isConstantOrConstantVector(CF
, true) &&
1987 !isConstantFPBuildVectorOrConstantFP(CF
))
1990 // Bail out if any constants are opaque because we can't constant fold those.
1991 // The exception is "and" and "or" with either 0 or -1 in which case we can
1992 // propagate non constant operands into select. I.e.:
1993 // and (select Cond, 0, -1), X --> select Cond, 0, X
1994 // or X, (select Cond, -1, 0) --> select Cond, -1, X
1995 auto BinOpcode
= BO
->getOpcode();
1996 bool CanFoldNonConst
=
1997 (BinOpcode
== ISD::AND
|| BinOpcode
== ISD::OR
) &&
1998 (isNullOrNullSplat(CT
) || isAllOnesOrAllOnesSplat(CT
)) &&
1999 (isNullOrNullSplat(CF
) || isAllOnesOrAllOnesSplat(CF
));
2001 SDValue CBO
= BO
->getOperand(SelOpNo
^ 1);
2002 if (!CanFoldNonConst
&&
2003 !isConstantOrConstantVector(CBO
, true) &&
2004 !isConstantFPBuildVectorOrConstantFP(CBO
))
2007 EVT VT
= Sel
.getValueType();
2009 // In case of shift value and shift amount may have different VT. For instance
2010 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2011 // swapped operands and value types do not match. NB: x86 is fine if operands
2012 // are not swapped with shift amount VT being not bigger than shifted value.
2013 // TODO: that is possible to check for a shift operation, correct VTs and
2014 // still perform optimization on x86 if needed.
2015 if (SelOpNo
&& VT
!= CBO
.getValueType())
2018 // We have a select-of-constants followed by a binary operator with a
2019 // constant. Eliminate the binop by pulling the constant math into the select.
2020 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2022 SDValue NewCT
= SelOpNo
? DAG
.getNode(BinOpcode
, DL
, VT
, CBO
, CT
)
2023 : DAG
.getNode(BinOpcode
, DL
, VT
, CT
, CBO
);
2024 if (!CanFoldNonConst
&& !NewCT
.isUndef() &&
2025 !isConstantOrConstantVector(NewCT
, true) &&
2026 !isConstantFPBuildVectorOrConstantFP(NewCT
))
2029 SDValue NewCF
= SelOpNo
? DAG
.getNode(BinOpcode
, DL
, VT
, CBO
, CF
)
2030 : DAG
.getNode(BinOpcode
, DL
, VT
, CF
, CBO
);
2031 if (!CanFoldNonConst
&& !NewCF
.isUndef() &&
2032 !isConstantOrConstantVector(NewCF
, true) &&
2033 !isConstantFPBuildVectorOrConstantFP(NewCF
))
2036 return DAG
.getSelect(DL
, VT
, Sel
.getOperand(0), NewCT
, NewCF
);
2039 static SDValue
foldAddSubBoolOfMaskedVal(SDNode
*N
, SelectionDAG
&DAG
) {
2040 assert((N
->getOpcode() == ISD::ADD
|| N
->getOpcode() == ISD::SUB
) &&
2041 "Expecting add or sub");
2043 // Match a constant operand and a zext operand for the math instruction:
2046 bool IsAdd
= N
->getOpcode() == ISD::ADD
;
2047 SDValue C
= IsAdd
? N
->getOperand(1) : N
->getOperand(0);
2048 SDValue Z
= IsAdd
? N
->getOperand(0) : N
->getOperand(1);
2049 auto *CN
= dyn_cast
<ConstantSDNode
>(C
);
2050 if (!CN
|| Z
.getOpcode() != ISD::ZERO_EXTEND
)
2053 // Match the zext operand as a setcc of a boolean.
2054 if (Z
.getOperand(0).getOpcode() != ISD::SETCC
||
2055 Z
.getOperand(0).getValueType() != MVT::i1
)
2058 // Match the compare as: setcc (X & 1), 0, eq.
2059 SDValue SetCC
= Z
.getOperand(0);
2060 ISD::CondCode CC
= cast
<CondCodeSDNode
>(SetCC
->getOperand(2))->get();
2061 if (CC
!= ISD::SETEQ
|| !isNullConstant(SetCC
.getOperand(1)) ||
2062 SetCC
.getOperand(0).getOpcode() != ISD::AND
||
2063 !isOneConstant(SetCC
.getOperand(0).getOperand(1)))
2066 // We are adding/subtracting a constant and an inverted low bit. Turn that
2067 // into a subtract/add of the low bit with incremented/decremented constant:
2068 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2069 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2070 EVT VT
= C
.getValueType();
2072 SDValue LowBit
= DAG
.getZExtOrTrunc(SetCC
.getOperand(0), DL
, VT
);
2073 SDValue C1
= IsAdd
? DAG
.getConstant(CN
->getAPIntValue() + 1, DL
, VT
) :
2074 DAG
.getConstant(CN
->getAPIntValue() - 1, DL
, VT
);
2075 return DAG
.getNode(IsAdd
? ISD::SUB
: ISD::ADD
, DL
, VT
, C1
, LowBit
);
2078 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2079 /// a shift and add with a different constant.
2080 static SDValue
foldAddSubOfSignBit(SDNode
*N
, SelectionDAG
&DAG
) {
2081 assert((N
->getOpcode() == ISD::ADD
|| N
->getOpcode() == ISD::SUB
) &&
2082 "Expecting add or sub");
2084 // We need a constant operand for the add/sub, and the other operand is a
2085 // logical shift right: add (srl), C or sub C, (srl).
2086 bool IsAdd
= N
->getOpcode() == ISD::ADD
;
2087 SDValue ConstantOp
= IsAdd
? N
->getOperand(1) : N
->getOperand(0);
2088 SDValue ShiftOp
= IsAdd
? N
->getOperand(0) : N
->getOperand(1);
2089 ConstantSDNode
*C
= isConstOrConstSplat(ConstantOp
);
2090 if (!C
|| ShiftOp
.getOpcode() != ISD::SRL
)
2093 // The shift must be of a 'not' value.
2094 SDValue Not
= ShiftOp
.getOperand(0);
2095 if (!Not
.hasOneUse() || !isBitwiseNot(Not
))
2098 // The shift must be moving the sign bit to the least-significant-bit.
2099 EVT VT
= ShiftOp
.getValueType();
2100 SDValue ShAmt
= ShiftOp
.getOperand(1);
2101 ConstantSDNode
*ShAmtC
= isConstOrConstSplat(ShAmt
);
2102 if (!ShAmtC
|| ShAmtC
->getZExtValue() != VT
.getScalarSizeInBits() - 1)
2105 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2106 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2107 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2109 auto ShOpcode
= IsAdd
? ISD::SRA
: ISD::SRL
;
2110 SDValue NewShift
= DAG
.getNode(ShOpcode
, DL
, VT
, Not
.getOperand(0), ShAmt
);
2111 APInt NewC
= IsAdd
? C
->getAPIntValue() + 1 : C
->getAPIntValue() - 1;
2112 return DAG
.getNode(ISD::ADD
, DL
, VT
, NewShift
, DAG
.getConstant(NewC
, DL
, VT
));
2115 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2116 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2117 /// are no common bits set in the operands).
2118 SDValue
DAGCombiner::visitADDLike(SDNode
*N
) {
2119 SDValue N0
= N
->getOperand(0);
2120 SDValue N1
= N
->getOperand(1);
2121 EVT VT
= N0
.getValueType();
2125 if (VT
.isVector()) {
2126 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
2129 // fold (add x, 0) -> x, vector edition
2130 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
2132 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
2136 // fold (add x, undef) -> undef
2143 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
)) {
2144 // canonicalize constant to RHS
2145 if (!DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
2146 return DAG
.getNode(ISD::ADD
, DL
, VT
, N1
, N0
);
2147 // fold (add c1, c2) -> c1+c2
2148 return DAG
.FoldConstantArithmetic(ISD::ADD
, DL
, VT
, N0
.getNode(),
2152 // fold (add x, 0) -> x
2153 if (isNullConstant(N1
))
2156 if (isConstantOrConstantVector(N1
, /* NoOpaque */ true)) {
2157 // fold ((c1-A)+c2) -> (c1+c2)-A
2158 if (N0
.getOpcode() == ISD::SUB
&&
2159 isConstantOrConstantVector(N0
.getOperand(0), /* NoOpaque */ true)) {
2160 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2161 return DAG
.getNode(ISD::SUB
, DL
, VT
,
2162 DAG
.getNode(ISD::ADD
, DL
, VT
, N1
, N0
.getOperand(0)),
2166 // add (sext i1 X), 1 -> zext (not i1 X)
2167 // We don't transform this pattern:
2168 // add (zext i1 X), -1 -> sext (not i1 X)
2169 // because most (?) targets generate better code for the zext form.
2170 if (N0
.getOpcode() == ISD::SIGN_EXTEND
&& N0
.hasOneUse() &&
2171 isOneOrOneSplat(N1
)) {
2172 SDValue X
= N0
.getOperand(0);
2173 if ((!LegalOperations
||
2174 (TLI
.isOperationLegal(ISD::XOR
, X
.getValueType()) &&
2175 TLI
.isOperationLegal(ISD::ZERO_EXTEND
, VT
))) &&
2176 X
.getScalarValueSizeInBits() == 1) {
2177 SDValue Not
= DAG
.getNOT(DL
, X
, X
.getValueType());
2178 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Not
);
2182 // Undo the add -> or combine to merge constant offsets from a frame index.
2183 if (N0
.getOpcode() == ISD::OR
&&
2184 isa
<FrameIndexSDNode
>(N0
.getOperand(0)) &&
2185 isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
2186 DAG
.haveNoCommonBitsSet(N0
.getOperand(0), N0
.getOperand(1))) {
2187 SDValue Add0
= DAG
.getNode(ISD::ADD
, DL
, VT
, N1
, N0
.getOperand(1));
2188 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
.getOperand(0), Add0
);
2192 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
2196 if (SDValue RADD
= reassociateOps(ISD::ADD
, DL
, N0
, N1
, N
->getFlags()))
2199 // fold ((0-A) + B) -> B-A
2200 if (N0
.getOpcode() == ISD::SUB
&& isNullOrNullSplat(N0
.getOperand(0)))
2201 return DAG
.getNode(ISD::SUB
, DL
, VT
, N1
, N0
.getOperand(1));
2203 // fold (A + (0-B)) -> A-B
2204 if (N1
.getOpcode() == ISD::SUB
&& isNullOrNullSplat(N1
.getOperand(0)))
2205 return DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, N1
.getOperand(1));
2207 // fold (A+(B-A)) -> B
2208 if (N1
.getOpcode() == ISD::SUB
&& N0
== N1
.getOperand(1))
2209 return N1
.getOperand(0);
2211 // fold ((B-A)+A) -> B
2212 if (N0
.getOpcode() == ISD::SUB
&& N1
== N0
.getOperand(1))
2213 return N0
.getOperand(0);
2215 // fold ((A-B)+(C-A)) -> (C-B)
2216 if (N0
.getOpcode() == ISD::SUB
&& N1
.getOpcode() == ISD::SUB
&&
2217 N0
.getOperand(0) == N1
.getOperand(1))
2218 return DAG
.getNode(ISD::SUB
, DL
, VT
, N1
.getOperand(0),
2221 // fold ((A-B)+(B-C)) -> (A-C)
2222 if (N0
.getOpcode() == ISD::SUB
&& N1
.getOpcode() == ISD::SUB
&&
2223 N0
.getOperand(1) == N1
.getOperand(0))
2224 return DAG
.getNode(ISD::SUB
, DL
, VT
, N0
.getOperand(0),
2227 // fold (A+(B-(A+C))) to (B-C)
2228 if (N1
.getOpcode() == ISD::SUB
&& N1
.getOperand(1).getOpcode() == ISD::ADD
&&
2229 N0
== N1
.getOperand(1).getOperand(0))
2230 return DAG
.getNode(ISD::SUB
, DL
, VT
, N1
.getOperand(0),
2231 N1
.getOperand(1).getOperand(1));
2233 // fold (A+(B-(C+A))) to (B-C)
2234 if (N1
.getOpcode() == ISD::SUB
&& N1
.getOperand(1).getOpcode() == ISD::ADD
&&
2235 N0
== N1
.getOperand(1).getOperand(1))
2236 return DAG
.getNode(ISD::SUB
, DL
, VT
, N1
.getOperand(0),
2237 N1
.getOperand(1).getOperand(0));
2239 // fold (A+((B-A)+or-C)) to (B+or-C)
2240 if ((N1
.getOpcode() == ISD::SUB
|| N1
.getOpcode() == ISD::ADD
) &&
2241 N1
.getOperand(0).getOpcode() == ISD::SUB
&&
2242 N0
== N1
.getOperand(0).getOperand(1))
2243 return DAG
.getNode(N1
.getOpcode(), DL
, VT
, N1
.getOperand(0).getOperand(0),
2246 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2247 if (N0
.getOpcode() == ISD::SUB
&& N1
.getOpcode() == ISD::SUB
) {
2248 SDValue N00
= N0
.getOperand(0);
2249 SDValue N01
= N0
.getOperand(1);
2250 SDValue N10
= N1
.getOperand(0);
2251 SDValue N11
= N1
.getOperand(1);
2253 if (isConstantOrConstantVector(N00
) || isConstantOrConstantVector(N10
))
2254 return DAG
.getNode(ISD::SUB
, DL
, VT
,
2255 DAG
.getNode(ISD::ADD
, SDLoc(N0
), VT
, N00
, N10
),
2256 DAG
.getNode(ISD::ADD
, SDLoc(N1
), VT
, N01
, N11
));
2259 // fold (add (umax X, C), -C) --> (usubsat X, C)
2260 if (N0
.getOpcode() == ISD::UMAX
&& hasOperation(ISD::USUBSAT
, VT
)) {
2261 auto MatchUSUBSAT
= [](ConstantSDNode
*Max
, ConstantSDNode
*Op
) {
2262 return (!Max
&& !Op
) ||
2263 (Max
&& Op
&& Max
->getAPIntValue() == (-Op
->getAPIntValue()));
2265 if (ISD::matchBinaryPredicate(N0
.getOperand(1), N1
, MatchUSUBSAT
,
2266 /*AllowUndefs*/ true))
2267 return DAG
.getNode(ISD::USUBSAT
, DL
, VT
, N0
.getOperand(0),
2271 if (SimplifyDemandedBits(SDValue(N
, 0)))
2272 return SDValue(N
, 0);
2274 if (isOneOrOneSplat(N1
)) {
2275 // fold (add (xor a, -1), 1) -> (sub 0, a)
2276 if (isBitwiseNot(N0
))
2277 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
),
2280 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2281 if (N0
.getOpcode() == ISD::ADD
||
2282 N0
.getOpcode() == ISD::UADDO
||
2283 N0
.getOpcode() == ISD::SADDO
) {
2286 if (isBitwiseNot(N0
.getOperand(0))) {
2287 A
= N0
.getOperand(1);
2288 Xor
= N0
.getOperand(0);
2289 } else if (isBitwiseNot(N0
.getOperand(1))) {
2290 A
= N0
.getOperand(0);
2291 Xor
= N0
.getOperand(1);
2295 return DAG
.getNode(ISD::SUB
, DL
, VT
, A
, Xor
.getOperand(0));
2299 if (SDValue Combined
= visitADDLikeCommutative(N0
, N1
, N
))
2302 if (SDValue Combined
= visitADDLikeCommutative(N1
, N0
, N
))
2308 SDValue
DAGCombiner::visitADD(SDNode
*N
) {
2309 SDValue N0
= N
->getOperand(0);
2310 SDValue N1
= N
->getOperand(1);
2311 EVT VT
= N0
.getValueType();
2314 if (SDValue Combined
= visitADDLike(N
))
2317 if (SDValue V
= foldAddSubBoolOfMaskedVal(N
, DAG
))
2320 if (SDValue V
= foldAddSubOfSignBit(N
, DAG
))
2323 // fold (a+b) -> (a|b) iff a and b share no bits.
2324 if ((!LegalOperations
|| TLI
.isOperationLegal(ISD::OR
, VT
)) &&
2325 DAG
.haveNoCommonBitsSet(N0
, N1
))
2326 return DAG
.getNode(ISD::OR
, DL
, VT
, N0
, N1
);
2331 SDValue
DAGCombiner::visitADDSAT(SDNode
*N
) {
2332 unsigned Opcode
= N
->getOpcode();
2333 SDValue N0
= N
->getOperand(0);
2334 SDValue N1
= N
->getOperand(1);
2335 EVT VT
= N0
.getValueType();
2339 if (VT
.isVector()) {
2340 // TODO SimplifyVBinOp
2342 // fold (add_sat x, 0) -> x, vector edition
2343 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
2345 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
2349 // fold (add_sat x, undef) -> -1
2350 if (N0
.isUndef() || N1
.isUndef())
2351 return DAG
.getAllOnesConstant(DL
, VT
);
2353 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
)) {
2354 // canonicalize constant to RHS
2355 if (!DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
2356 return DAG
.getNode(Opcode
, DL
, VT
, N1
, N0
);
2357 // fold (add_sat c1, c2) -> c3
2358 return DAG
.FoldConstantArithmetic(Opcode
, DL
, VT
, N0
.getNode(),
2362 // fold (add_sat x, 0) -> x
2363 if (isNullConstant(N1
))
2366 // If it cannot overflow, transform into an add.
2367 if (Opcode
== ISD::UADDSAT
)
2368 if (DAG
.computeOverflowKind(N0
, N1
) == SelectionDAG::OFK_Never
)
2369 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, N1
);
2374 static SDValue
getAsCarry(const TargetLowering
&TLI
, SDValue V
) {
2375 bool Masked
= false;
2377 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2379 if (V
.getOpcode() == ISD::TRUNCATE
|| V
.getOpcode() == ISD::ZERO_EXTEND
) {
2380 V
= V
.getOperand(0);
2384 if (V
.getOpcode() == ISD::AND
&& isOneConstant(V
.getOperand(1))) {
2386 V
= V
.getOperand(0);
2393 // If this is not a carry, return.
2394 if (V
.getResNo() != 1)
2397 if (V
.getOpcode() != ISD::ADDCARRY
&& V
.getOpcode() != ISD::SUBCARRY
&&
2398 V
.getOpcode() != ISD::UADDO
&& V
.getOpcode() != ISD::USUBO
)
2401 EVT VT
= V
.getNode()->getValueType(0);
2402 if (!TLI
.isOperationLegalOrCustom(V
.getOpcode(), VT
))
2405 // If the result is masked, then no matter what kind of bool it is we can
2406 // return. If it isn't, then we need to make sure the bool type is either 0 or
2407 // 1 and not other values.
2409 TLI
.getBooleanContents(V
.getValueType()) ==
2410 TargetLoweringBase::ZeroOrOneBooleanContent
)
2416 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2417 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2418 /// the opcode and bypass the mask operation.
2419 static SDValue
foldAddSubMasked1(bool IsAdd
, SDValue N0
, SDValue N1
,
2420 SelectionDAG
&DAG
, const SDLoc
&DL
) {
2421 if (N1
.getOpcode() != ISD::AND
|| !isOneOrOneSplat(N1
->getOperand(1)))
2424 EVT VT
= N0
.getValueType();
2425 if (DAG
.ComputeNumSignBits(N1
.getOperand(0)) != VT
.getScalarSizeInBits())
2428 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2429 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2430 return DAG
.getNode(IsAdd
? ISD::SUB
: ISD::ADD
, DL
, VT
, N0
, N1
.getOperand(0));
2433 /// Helper for doing combines based on N0 and N1 being added to each other.
2434 SDValue
DAGCombiner::visitADDLikeCommutative(SDValue N0
, SDValue N1
,
2435 SDNode
*LocReference
) {
2436 EVT VT
= N0
.getValueType();
2437 SDLoc
DL(LocReference
);
2439 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2440 if (N1
.getOpcode() == ISD::SHL
&& N1
.getOperand(0).getOpcode() == ISD::SUB
&&
2441 isNullOrNullSplat(N1
.getOperand(0).getOperand(0)))
2442 return DAG
.getNode(ISD::SUB
, DL
, VT
, N0
,
2443 DAG
.getNode(ISD::SHL
, DL
, VT
,
2444 N1
.getOperand(0).getOperand(1),
2447 if (SDValue V
= foldAddSubMasked1(true, N0
, N1
, DAG
, DL
))
2450 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2451 // rather than 'add 0/-1' (the zext should get folded).
2452 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2453 if (N0
.getOpcode() == ISD::SIGN_EXTEND
&&
2454 N0
.getOperand(0).getScalarValueSizeInBits() == 1 &&
2455 TLI
.getBooleanContents(VT
) == TargetLowering::ZeroOrOneBooleanContent
) {
2456 SDValue ZExt
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, N0
.getOperand(0));
2457 return DAG
.getNode(ISD::SUB
, DL
, VT
, N1
, ZExt
);
2460 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2461 if (N1
.getOpcode() == ISD::SIGN_EXTEND_INREG
) {
2462 VTSDNode
*TN
= cast
<VTSDNode
>(N1
.getOperand(1));
2463 if (TN
->getVT() == MVT::i1
) {
2464 SDValue ZExt
= DAG
.getNode(ISD::AND
, DL
, VT
, N1
.getOperand(0),
2465 DAG
.getConstant(1, DL
, VT
));
2466 return DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, ZExt
);
2470 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2471 if (N1
.getOpcode() == ISD::ADDCARRY
&& isNullConstant(N1
.getOperand(1)) &&
2473 return DAG
.getNode(ISD::ADDCARRY
, DL
, N1
->getVTList(),
2474 N0
, N1
.getOperand(0), N1
.getOperand(2));
2476 // (add X, Carry) -> (addcarry X, 0, Carry)
2477 if (TLI
.isOperationLegalOrCustom(ISD::ADDCARRY
, VT
))
2478 if (SDValue Carry
= getAsCarry(TLI
, N1
))
2479 return DAG
.getNode(ISD::ADDCARRY
, DL
,
2480 DAG
.getVTList(VT
, Carry
.getValueType()), N0
,
2481 DAG
.getConstant(0, DL
, VT
), Carry
);
2486 SDValue
DAGCombiner::visitADDC(SDNode
*N
) {
2487 SDValue N0
= N
->getOperand(0);
2488 SDValue N1
= N
->getOperand(1);
2489 EVT VT
= N0
.getValueType();
2492 // If the flag result is dead, turn this into an ADD.
2493 if (!N
->hasAnyUseOfValue(1))
2494 return CombineTo(N
, DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, N1
),
2495 DAG
.getNode(ISD::CARRY_FALSE
, DL
, MVT::Glue
));
2497 // canonicalize constant to RHS.
2498 ConstantSDNode
*N0C
= dyn_cast
<ConstantSDNode
>(N0
);
2499 ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2501 return DAG
.getNode(ISD::ADDC
, DL
, N
->getVTList(), N1
, N0
);
2503 // fold (addc x, 0) -> x + no carry out
2504 if (isNullConstant(N1
))
2505 return CombineTo(N
, N0
, DAG
.getNode(ISD::CARRY_FALSE
,
2508 // If it cannot overflow, transform into an add.
2509 if (DAG
.computeOverflowKind(N0
, N1
) == SelectionDAG::OFK_Never
)
2510 return CombineTo(N
, DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, N1
),
2511 DAG
.getNode(ISD::CARRY_FALSE
, DL
, MVT::Glue
));
2516 static SDValue
flipBoolean(SDValue V
, const SDLoc
&DL
,
2517 SelectionDAG
&DAG
, const TargetLowering
&TLI
) {
2518 EVT VT
= V
.getValueType();
2521 switch (TLI
.getBooleanContents(VT
)) {
2522 case TargetLowering::ZeroOrOneBooleanContent
:
2523 case TargetLowering::UndefinedBooleanContent
:
2524 Cst
= DAG
.getConstant(1, DL
, VT
);
2526 case TargetLowering::ZeroOrNegativeOneBooleanContent
:
2527 Cst
= DAG
.getConstant(-1, DL
, VT
);
2531 return DAG
.getNode(ISD::XOR
, DL
, VT
, V
, Cst
);
2534 static SDValue
extractBooleanFlip(SDValue V
, const TargetLowering
&TLI
) {
2535 if (V
.getOpcode() != ISD::XOR
)
2538 ConstantSDNode
*Const
= isConstOrConstSplat(V
.getOperand(1), false);
2542 EVT VT
= V
.getValueType();
2544 bool IsFlip
= false;
2545 switch(TLI
.getBooleanContents(VT
)) {
2546 case TargetLowering::ZeroOrOneBooleanContent
:
2547 IsFlip
= Const
->isOne();
2549 case TargetLowering::ZeroOrNegativeOneBooleanContent
:
2550 IsFlip
= Const
->isAllOnesValue();
2552 case TargetLowering::UndefinedBooleanContent
:
2553 IsFlip
= (Const
->getAPIntValue() & 0x01) == 1;
2558 return V
.getOperand(0);
2562 SDValue
DAGCombiner::visitADDO(SDNode
*N
) {
2563 SDValue N0
= N
->getOperand(0);
2564 SDValue N1
= N
->getOperand(1);
2565 EVT VT
= N0
.getValueType();
2566 bool IsSigned
= (ISD::SADDO
== N
->getOpcode());
2568 EVT CarryVT
= N
->getValueType(1);
2571 // If the flag result is dead, turn this into an ADD.
2572 if (!N
->hasAnyUseOfValue(1))
2573 return CombineTo(N
, DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, N1
),
2574 DAG
.getUNDEF(CarryVT
));
2576 // canonicalize constant to RHS.
2577 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
2578 !DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
2579 return DAG
.getNode(N
->getOpcode(), DL
, N
->getVTList(), N1
, N0
);
2581 // fold (addo x, 0) -> x + no carry out
2582 if (isNullOrNullSplat(N1
))
2583 return CombineTo(N
, N0
, DAG
.getConstant(0, DL
, CarryVT
));
2586 // If it cannot overflow, transform into an add.
2587 if (DAG
.computeOverflowKind(N0
, N1
) == SelectionDAG::OFK_Never
)
2588 return CombineTo(N
, DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, N1
),
2589 DAG
.getConstant(0, DL
, CarryVT
));
2591 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2592 if (isBitwiseNot(N0
) && isOneOrOneSplat(N1
)) {
2593 SDValue Sub
= DAG
.getNode(ISD::USUBO
, DL
, N
->getVTList(),
2594 DAG
.getConstant(0, DL
, VT
), N0
.getOperand(0));
2595 return CombineTo(N
, Sub
,
2596 flipBoolean(Sub
.getValue(1), DL
, DAG
, TLI
));
2599 if (SDValue Combined
= visitUADDOLike(N0
, N1
, N
))
2602 if (SDValue Combined
= visitUADDOLike(N1
, N0
, N
))
2609 SDValue
DAGCombiner::visitUADDOLike(SDValue N0
, SDValue N1
, SDNode
*N
) {
2610 EVT VT
= N0
.getValueType();
2614 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2615 // If Y + 1 cannot overflow.
2616 if (N1
.getOpcode() == ISD::ADDCARRY
&& isNullConstant(N1
.getOperand(1))) {
2617 SDValue Y
= N1
.getOperand(0);
2618 SDValue One
= DAG
.getConstant(1, SDLoc(N
), Y
.getValueType());
2619 if (DAG
.computeOverflowKind(Y
, One
) == SelectionDAG::OFK_Never
)
2620 return DAG
.getNode(ISD::ADDCARRY
, SDLoc(N
), N
->getVTList(), N0
, Y
,
2624 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2625 if (TLI
.isOperationLegalOrCustom(ISD::ADDCARRY
, VT
))
2626 if (SDValue Carry
= getAsCarry(TLI
, N1
))
2627 return DAG
.getNode(ISD::ADDCARRY
, SDLoc(N
), N
->getVTList(), N0
,
2628 DAG
.getConstant(0, SDLoc(N
), VT
), Carry
);
2633 SDValue
DAGCombiner::visitADDE(SDNode
*N
) {
2634 SDValue N0
= N
->getOperand(0);
2635 SDValue N1
= N
->getOperand(1);
2636 SDValue CarryIn
= N
->getOperand(2);
2638 // canonicalize constant to RHS
2639 ConstantSDNode
*N0C
= dyn_cast
<ConstantSDNode
>(N0
);
2640 ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2642 return DAG
.getNode(ISD::ADDE
, SDLoc(N
), N
->getVTList(),
2645 // fold (adde x, y, false) -> (addc x, y)
2646 if (CarryIn
.getOpcode() == ISD::CARRY_FALSE
)
2647 return DAG
.getNode(ISD::ADDC
, SDLoc(N
), N
->getVTList(), N0
, N1
);
2652 SDValue
DAGCombiner::visitADDCARRY(SDNode
*N
) {
2653 SDValue N0
= N
->getOperand(0);
2654 SDValue N1
= N
->getOperand(1);
2655 SDValue CarryIn
= N
->getOperand(2);
2658 // canonicalize constant to RHS
2659 ConstantSDNode
*N0C
= dyn_cast
<ConstantSDNode
>(N0
);
2660 ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
);
2662 return DAG
.getNode(ISD::ADDCARRY
, DL
, N
->getVTList(), N1
, N0
, CarryIn
);
2664 // fold (addcarry x, y, false) -> (uaddo x, y)
2665 if (isNullConstant(CarryIn
)) {
2666 if (!LegalOperations
||
2667 TLI
.isOperationLegalOrCustom(ISD::UADDO
, N
->getValueType(0)))
2668 return DAG
.getNode(ISD::UADDO
, DL
, N
->getVTList(), N0
, N1
);
2671 EVT CarryVT
= CarryIn
.getValueType();
2673 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2674 if (isNullConstant(N0
) && isNullConstant(N1
)) {
2675 EVT VT
= N0
.getValueType();
2676 SDValue CarryExt
= DAG
.getBoolExtOrTrunc(CarryIn
, DL
, VT
, CarryVT
);
2677 AddToWorklist(CarryExt
.getNode());
2678 return CombineTo(N
, DAG
.getNode(ISD::AND
, DL
, VT
, CarryExt
,
2679 DAG
.getConstant(1, DL
, VT
)),
2680 DAG
.getConstant(0, DL
, CarryVT
));
2683 // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2684 if (isBitwiseNot(N0
) && isNullConstant(N1
)) {
2685 if (SDValue B
= extractBooleanFlip(CarryIn
, TLI
)) {
2686 SDValue Sub
= DAG
.getNode(ISD::SUBCARRY
, DL
, N
->getVTList(),
2687 DAG
.getConstant(0, DL
, N0
.getValueType()),
2688 N0
.getOperand(0), B
);
2689 return CombineTo(N
, Sub
,
2690 flipBoolean(Sub
.getValue(1), DL
, DAG
, TLI
));
2694 if (SDValue Combined
= visitADDCARRYLike(N0
, N1
, CarryIn
, N
))
2697 if (SDValue Combined
= visitADDCARRYLike(N1
, N0
, CarryIn
, N
))
2703 SDValue
DAGCombiner::visitADDCARRYLike(SDValue N0
, SDValue N1
, SDValue CarryIn
,
2705 // Iff the flag result is dead:
2706 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2707 if ((N0
.getOpcode() == ISD::ADD
||
2708 (N0
.getOpcode() == ISD::UADDO
&& N0
.getResNo() == 0)) &&
2709 isNullConstant(N1
) && !N
->hasAnyUseOfValue(1))
2710 return DAG
.getNode(ISD::ADDCARRY
, SDLoc(N
), N
->getVTList(),
2711 N0
.getOperand(0), N0
.getOperand(1), CarryIn
);
2714 * When one of the addcarry argument is itself a carry, we may be facing
2715 * a diamond carry propagation. In which case we try to transform the DAG
2716 * to ensure linear carry propagation if that is possible.
2718 * We are trying to get:
2719 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2721 if (auto Y
= getAsCarry(TLI
, N1
)) {
2727 * | (addcarry *, 0, Z)
2731 * (addcarry X, *, *)
2733 if (Y
.getOpcode() == ISD::UADDO
&&
2734 CarryIn
.getResNo() == 1 &&
2735 CarryIn
.getOpcode() == ISD::ADDCARRY
&&
2736 isNullConstant(CarryIn
.getOperand(1)) &&
2737 CarryIn
.getOperand(0) == Y
.getValue(0)) {
2738 auto NewY
= DAG
.getNode(ISD::ADDCARRY
, SDLoc(N
), Y
->getVTList(),
2739 Y
.getOperand(0), Y
.getOperand(1),
2740 CarryIn
.getOperand(2));
2741 AddToWorklist(NewY
.getNode());
2742 return DAG
.getNode(ISD::ADDCARRY
, SDLoc(N
), N
->getVTList(), N0
,
2743 DAG
.getConstant(0, SDLoc(N
), N0
.getValueType()),
2751 // Since it may not be valid to emit a fold to zero for vector initializers
2752 // check if we can before folding.
2753 static SDValue
tryFoldToZero(const SDLoc
&DL
, const TargetLowering
&TLI
, EVT VT
,
2754 SelectionDAG
&DAG
, bool LegalOperations
) {
2756 return DAG
.getConstant(0, DL
, VT
);
2757 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::BUILD_VECTOR
, VT
))
2758 return DAG
.getConstant(0, DL
, VT
);
2762 SDValue
DAGCombiner::visitSUB(SDNode
*N
) {
2763 SDValue N0
= N
->getOperand(0);
2764 SDValue N1
= N
->getOperand(1);
2765 EVT VT
= N0
.getValueType();
2769 if (VT
.isVector()) {
2770 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
2773 // fold (sub x, 0) -> x, vector edition
2774 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
2778 // fold (sub x, x) -> 0
2779 // FIXME: Refactor this and xor and other similar operations together.
2781 return tryFoldToZero(DL
, TLI
, VT
, DAG
, LegalOperations
);
2782 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
2783 DAG
.isConstantIntBuildVectorOrConstantInt(N1
)) {
2784 // fold (sub c1, c2) -> c1-c2
2785 return DAG
.FoldConstantArithmetic(ISD::SUB
, DL
, VT
, N0
.getNode(),
2789 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
2792 ConstantSDNode
*N1C
= getAsNonOpaqueConstant(N1
);
2794 // fold (sub x, c) -> (add x, -c)
2796 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
,
2797 DAG
.getConstant(-N1C
->getAPIntValue(), DL
, VT
));
2800 if (isNullOrNullSplat(N0
)) {
2801 unsigned BitWidth
= VT
.getScalarSizeInBits();
2802 // Right-shifting everything out but the sign bit followed by negation is
2803 // the same as flipping arithmetic/logical shift type without the negation:
2804 // -(X >>u 31) -> (X >>s 31)
2805 // -(X >>s 31) -> (X >>u 31)
2806 if (N1
->getOpcode() == ISD::SRA
|| N1
->getOpcode() == ISD::SRL
) {
2807 ConstantSDNode
*ShiftAmt
= isConstOrConstSplat(N1
.getOperand(1));
2808 if (ShiftAmt
&& ShiftAmt
->getZExtValue() == BitWidth
- 1) {
2809 auto NewSh
= N1
->getOpcode() == ISD::SRA
? ISD::SRL
: ISD::SRA
;
2810 if (!LegalOperations
|| TLI
.isOperationLegal(NewSh
, VT
))
2811 return DAG
.getNode(NewSh
, DL
, VT
, N1
.getOperand(0), N1
.getOperand(1));
2815 // 0 - X --> 0 if the sub is NUW.
2816 if (N
->getFlags().hasNoUnsignedWrap())
2819 if (DAG
.MaskedValueIsZero(N1
, ~APInt::getSignMask(BitWidth
))) {
2820 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2821 // N1 must be 0 because negating the minimum signed value is undefined.
2822 if (N
->getFlags().hasNoSignedWrap())
2825 // 0 - X --> X if X is 0 or the minimum signed value.
2830 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2831 if (isAllOnesOrAllOnesSplat(N0
))
2832 return DAG
.getNode(ISD::XOR
, DL
, VT
, N1
, N0
);
2834 // fold (A - (0-B)) -> A+B
2835 if (N1
.getOpcode() == ISD::SUB
&& isNullOrNullSplat(N1
.getOperand(0)))
2836 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, N1
.getOperand(1));
2838 // fold A-(A-B) -> B
2839 if (N1
.getOpcode() == ISD::SUB
&& N0
== N1
.getOperand(0))
2840 return N1
.getOperand(1);
2842 // fold (A+B)-A -> B
2843 if (N0
.getOpcode() == ISD::ADD
&& N0
.getOperand(0) == N1
)
2844 return N0
.getOperand(1);
2846 // fold (A+B)-B -> A
2847 if (N0
.getOpcode() == ISD::ADD
&& N0
.getOperand(1) == N1
)
2848 return N0
.getOperand(0);
2850 // fold C2-(A+C1) -> (C2-C1)-A
2851 if (N1
.getOpcode() == ISD::ADD
) {
2852 SDValue N11
= N1
.getOperand(1);
2853 if (isConstantOrConstantVector(N0
, /* NoOpaques */ true) &&
2854 isConstantOrConstantVector(N11
, /* NoOpaques */ true)) {
2855 SDValue NewC
= DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, N11
);
2856 return DAG
.getNode(ISD::SUB
, DL
, VT
, NewC
, N1
.getOperand(0));
2860 // fold ((A+(B+or-C))-B) -> A+or-C
2861 if (N0
.getOpcode() == ISD::ADD
&&
2862 (N0
.getOperand(1).getOpcode() == ISD::SUB
||
2863 N0
.getOperand(1).getOpcode() == ISD::ADD
) &&
2864 N0
.getOperand(1).getOperand(0) == N1
)
2865 return DAG
.getNode(N0
.getOperand(1).getOpcode(), DL
, VT
, N0
.getOperand(0),
2866 N0
.getOperand(1).getOperand(1));
2868 // fold ((A+(C+B))-B) -> A+C
2869 if (N0
.getOpcode() == ISD::ADD
&& N0
.getOperand(1).getOpcode() == ISD::ADD
&&
2870 N0
.getOperand(1).getOperand(1) == N1
)
2871 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
.getOperand(0),
2872 N0
.getOperand(1).getOperand(0));
2874 // fold ((A-(B-C))-C) -> A-B
2875 if (N0
.getOpcode() == ISD::SUB
&& N0
.getOperand(1).getOpcode() == ISD::SUB
&&
2876 N0
.getOperand(1).getOperand(1) == N1
)
2877 return DAG
.getNode(ISD::SUB
, DL
, VT
, N0
.getOperand(0),
2878 N0
.getOperand(1).getOperand(0));
2880 // fold (A-(B-C)) -> A+(C-B)
2881 if (N1
.getOpcode() == ISD::SUB
&& N1
.hasOneUse())
2882 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
,
2883 DAG
.getNode(ISD::SUB
, DL
, VT
, N1
.getOperand(1),
2886 // fold (X - (-Y * Z)) -> (X + (Y * Z))
2887 if (N1
.getOpcode() == ISD::MUL
&& N1
.hasOneUse()) {
2888 if (N1
.getOperand(0).getOpcode() == ISD::SUB
&&
2889 isNullOrNullSplat(N1
.getOperand(0).getOperand(0))) {
2890 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, VT
,
2891 N1
.getOperand(0).getOperand(1),
2893 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, Mul
);
2895 if (N1
.getOperand(1).getOpcode() == ISD::SUB
&&
2896 isNullOrNullSplat(N1
.getOperand(1).getOperand(0))) {
2897 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, VT
,
2899 N1
.getOperand(1).getOperand(1));
2900 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, Mul
);
2904 // If either operand of a sub is undef, the result is undef
2910 if (SDValue V
= foldAddSubBoolOfMaskedVal(N
, DAG
))
2913 if (SDValue V
= foldAddSubOfSignBit(N
, DAG
))
2916 if (SDValue V
= foldAddSubMasked1(false, N0
, N1
, DAG
, SDLoc(N
)))
2919 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2920 // rather than 'sub 0/1' (the sext should get folded).
2921 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2922 if (N1
.getOpcode() == ISD::ZERO_EXTEND
&&
2923 N1
.getOperand(0).getScalarValueSizeInBits() == 1 &&
2924 TLI
.getBooleanContents(VT
) ==
2925 TargetLowering::ZeroOrNegativeOneBooleanContent
) {
2926 SDValue SExt
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VT
, N1
.getOperand(0));
2927 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, SExt
);
2930 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2931 if (TLI
.isOperationLegalOrCustom(ISD::ABS
, VT
)) {
2932 if (N0
.getOpcode() == ISD::XOR
&& N1
.getOpcode() == ISD::SRA
) {
2933 SDValue X0
= N0
.getOperand(0), X1
= N0
.getOperand(1);
2934 SDValue S0
= N1
.getOperand(0);
2935 if ((X0
== S0
&& X1
== N1
) || (X0
== N1
&& X1
== S0
)) {
2936 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
2937 if (ConstantSDNode
*C
= isConstOrConstSplat(N1
.getOperand(1)))
2938 if (C
->getAPIntValue() == (OpSizeInBits
- 1))
2939 return DAG
.getNode(ISD::ABS
, SDLoc(N
), VT
, S0
);
2944 // If the relocation model supports it, consider symbol offsets.
2945 if (GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(N0
))
2946 if (!LegalOperations
&& TLI
.isOffsetFoldingLegal(GA
)) {
2947 // fold (sub Sym, c) -> Sym-c
2948 if (N1C
&& GA
->getOpcode() == ISD::GlobalAddress
)
2949 return DAG
.getGlobalAddress(GA
->getGlobal(), SDLoc(N1C
), VT
,
2951 (uint64_t)N1C
->getSExtValue());
2952 // fold (sub Sym+c1, Sym+c2) -> c1-c2
2953 if (GlobalAddressSDNode
*GB
= dyn_cast
<GlobalAddressSDNode
>(N1
))
2954 if (GA
->getGlobal() == GB
->getGlobal())
2955 return DAG
.getConstant((uint64_t)GA
->getOffset() - GB
->getOffset(),
2959 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2960 if (N1
.getOpcode() == ISD::SIGN_EXTEND_INREG
) {
2961 VTSDNode
*TN
= cast
<VTSDNode
>(N1
.getOperand(1));
2962 if (TN
->getVT() == MVT::i1
) {
2963 SDValue ZExt
= DAG
.getNode(ISD::AND
, DL
, VT
, N1
.getOperand(0),
2964 DAG
.getConstant(1, DL
, VT
));
2965 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, ZExt
);
2969 // Prefer an add for more folding potential and possibly better codegen:
2970 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2971 if (!LegalOperations
&& N1
.getOpcode() == ISD::SRL
&& N1
.hasOneUse()) {
2972 SDValue ShAmt
= N1
.getOperand(1);
2973 ConstantSDNode
*ShAmtC
= isConstOrConstSplat(ShAmt
);
2974 if (ShAmtC
&& ShAmtC
->getZExtValue() == N1
.getScalarValueSizeInBits() - 1) {
2975 SDValue SRA
= DAG
.getNode(ISD::SRA
, DL
, VT
, N1
.getOperand(0), ShAmt
);
2976 return DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, SRA
);
2983 SDValue
DAGCombiner::visitSUBSAT(SDNode
*N
) {
2984 SDValue N0
= N
->getOperand(0);
2985 SDValue N1
= N
->getOperand(1);
2986 EVT VT
= N0
.getValueType();
2990 if (VT
.isVector()) {
2991 // TODO SimplifyVBinOp
2993 // fold (sub_sat x, 0) -> x, vector edition
2994 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
2998 // fold (sub_sat x, undef) -> 0
2999 if (N0
.isUndef() || N1
.isUndef())
3000 return DAG
.getConstant(0, DL
, VT
);
3002 // fold (sub_sat x, x) -> 0
3004 return DAG
.getConstant(0, DL
, VT
);
3006 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
3007 DAG
.isConstantIntBuildVectorOrConstantInt(N1
)) {
3008 // fold (sub_sat c1, c2) -> c3
3009 return DAG
.FoldConstantArithmetic(N
->getOpcode(), DL
, VT
, N0
.getNode(),
3013 // fold (sub_sat x, 0) -> x
3014 if (isNullConstant(N1
))
3020 SDValue
DAGCombiner::visitSUBC(SDNode
*N
) {
3021 SDValue N0
= N
->getOperand(0);
3022 SDValue N1
= N
->getOperand(1);
3023 EVT VT
= N0
.getValueType();
3026 // If the flag result is dead, turn this into an SUB.
3027 if (!N
->hasAnyUseOfValue(1))
3028 return CombineTo(N
, DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, N1
),
3029 DAG
.getNode(ISD::CARRY_FALSE
, DL
, MVT::Glue
));
3031 // fold (subc x, x) -> 0 + no borrow
3033 return CombineTo(N
, DAG
.getConstant(0, DL
, VT
),
3034 DAG
.getNode(ISD::CARRY_FALSE
, DL
, MVT::Glue
));
3036 // fold (subc x, 0) -> x + no borrow
3037 if (isNullConstant(N1
))
3038 return CombineTo(N
, N0
, DAG
.getNode(ISD::CARRY_FALSE
, DL
, MVT::Glue
));
3040 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3041 if (isAllOnesConstant(N0
))
3042 return CombineTo(N
, DAG
.getNode(ISD::XOR
, DL
, VT
, N1
, N0
),
3043 DAG
.getNode(ISD::CARRY_FALSE
, DL
, MVT::Glue
));
3048 SDValue
DAGCombiner::visitSUBO(SDNode
*N
) {
3049 SDValue N0
= N
->getOperand(0);
3050 SDValue N1
= N
->getOperand(1);
3051 EVT VT
= N0
.getValueType();
3052 bool IsSigned
= (ISD::SSUBO
== N
->getOpcode());
3054 EVT CarryVT
= N
->getValueType(1);
3057 // If the flag result is dead, turn this into an SUB.
3058 if (!N
->hasAnyUseOfValue(1))
3059 return CombineTo(N
, DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, N1
),
3060 DAG
.getUNDEF(CarryVT
));
3062 // fold (subo x, x) -> 0 + no borrow
3064 return CombineTo(N
, DAG
.getConstant(0, DL
, VT
),
3065 DAG
.getConstant(0, DL
, CarryVT
));
3067 ConstantSDNode
*N1C
= getAsNonOpaqueConstant(N1
);
3069 // fold (subox, c) -> (addo x, -c)
3070 if (IsSigned
&& N1C
&& !N1C
->getAPIntValue().isMinSignedValue()) {
3071 return DAG
.getNode(ISD::SADDO
, DL
, N
->getVTList(), N0
,
3072 DAG
.getConstant(-N1C
->getAPIntValue(), DL
, VT
));
3075 // fold (subo x, 0) -> x + no borrow
3076 if (isNullOrNullSplat(N1
))
3077 return CombineTo(N
, N0
, DAG
.getConstant(0, DL
, CarryVT
));
3079 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3080 if (!IsSigned
&& isAllOnesOrAllOnesSplat(N0
))
3081 return CombineTo(N
, DAG
.getNode(ISD::XOR
, DL
, VT
, N1
, N0
),
3082 DAG
.getConstant(0, DL
, CarryVT
));
3087 SDValue
DAGCombiner::visitSUBE(SDNode
*N
) {
3088 SDValue N0
= N
->getOperand(0);
3089 SDValue N1
= N
->getOperand(1);
3090 SDValue CarryIn
= N
->getOperand(2);
3092 // fold (sube x, y, false) -> (subc x, y)
3093 if (CarryIn
.getOpcode() == ISD::CARRY_FALSE
)
3094 return DAG
.getNode(ISD::SUBC
, SDLoc(N
), N
->getVTList(), N0
, N1
);
3099 SDValue
DAGCombiner::visitSUBCARRY(SDNode
*N
) {
3100 SDValue N0
= N
->getOperand(0);
3101 SDValue N1
= N
->getOperand(1);
3102 SDValue CarryIn
= N
->getOperand(2);
3104 // fold (subcarry x, y, false) -> (usubo x, y)
3105 if (isNullConstant(CarryIn
)) {
3106 if (!LegalOperations
||
3107 TLI
.isOperationLegalOrCustom(ISD::USUBO
, N
->getValueType(0)))
3108 return DAG
.getNode(ISD::USUBO
, SDLoc(N
), N
->getVTList(), N0
, N1
);
3114 SDValue
DAGCombiner::visitMUL(SDNode
*N
) {
3115 SDValue N0
= N
->getOperand(0);
3116 SDValue N1
= N
->getOperand(1);
3117 EVT VT
= N0
.getValueType();
3119 // fold (mul x, undef) -> 0
3120 if (N0
.isUndef() || N1
.isUndef())
3121 return DAG
.getConstant(0, SDLoc(N
), VT
);
3123 bool N0IsConst
= false;
3124 bool N1IsConst
= false;
3125 bool N1IsOpaqueConst
= false;
3126 bool N0IsOpaqueConst
= false;
3127 APInt ConstValue0
, ConstValue1
;
3129 if (VT
.isVector()) {
3130 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
3133 N0IsConst
= ISD::isConstantSplatVector(N0
.getNode(), ConstValue0
);
3134 N1IsConst
= ISD::isConstantSplatVector(N1
.getNode(), ConstValue1
);
3135 assert((!N0IsConst
||
3136 ConstValue0
.getBitWidth() == VT
.getScalarSizeInBits()) &&
3137 "Splat APInt should be element width");
3138 assert((!N1IsConst
||
3139 ConstValue1
.getBitWidth() == VT
.getScalarSizeInBits()) &&
3140 "Splat APInt should be element width");
3142 N0IsConst
= isa
<ConstantSDNode
>(N0
);
3144 ConstValue0
= cast
<ConstantSDNode
>(N0
)->getAPIntValue();
3145 N0IsOpaqueConst
= cast
<ConstantSDNode
>(N0
)->isOpaque();
3147 N1IsConst
= isa
<ConstantSDNode
>(N1
);
3149 ConstValue1
= cast
<ConstantSDNode
>(N1
)->getAPIntValue();
3150 N1IsOpaqueConst
= cast
<ConstantSDNode
>(N1
)->isOpaque();
3154 // fold (mul c1, c2) -> c1*c2
3155 if (N0IsConst
&& N1IsConst
&& !N0IsOpaqueConst
&& !N1IsOpaqueConst
)
3156 return DAG
.FoldConstantArithmetic(ISD::MUL
, SDLoc(N
), VT
,
3157 N0
.getNode(), N1
.getNode());
3159 // canonicalize constant to RHS (vector doesn't have to splat)
3160 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
3161 !DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
3162 return DAG
.getNode(ISD::MUL
, SDLoc(N
), VT
, N1
, N0
);
3163 // fold (mul x, 0) -> 0
3164 if (N1IsConst
&& ConstValue1
.isNullValue())
3166 // fold (mul x, 1) -> x
3167 if (N1IsConst
&& ConstValue1
.isOneValue())
3170 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
3173 // fold (mul x, -1) -> 0-x
3174 if (N1IsConst
&& ConstValue1
.isAllOnesValue()) {
3176 return DAG
.getNode(ISD::SUB
, DL
, VT
,
3177 DAG
.getConstant(0, DL
, VT
), N0
);
3179 // fold (mul x, (1 << c)) -> x << c
3180 if (isConstantOrConstantVector(N1
, /*NoOpaques*/ true) &&
3181 DAG
.isKnownToBeAPowerOfTwo(N1
) &&
3182 (!VT
.isVector() || Level
<= AfterLegalizeVectorOps
)) {
3184 SDValue LogBase2
= BuildLogBase2(N1
, DL
);
3185 EVT ShiftVT
= getShiftAmountTy(N0
.getValueType());
3186 SDValue Trunc
= DAG
.getZExtOrTrunc(LogBase2
, DL
, ShiftVT
);
3187 return DAG
.getNode(ISD::SHL
, DL
, VT
, N0
, Trunc
);
3189 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3190 if (N1IsConst
&& !N1IsOpaqueConst
&& (-ConstValue1
).isPowerOf2()) {
3191 unsigned Log2Val
= (-ConstValue1
).logBase2();
3193 // FIXME: If the input is something that is easily negated (e.g. a
3194 // single-use add), we should put the negate there.
3195 return DAG
.getNode(ISD::SUB
, DL
, VT
,
3196 DAG
.getConstant(0, DL
, VT
),
3197 DAG
.getNode(ISD::SHL
, DL
, VT
, N0
,
3198 DAG
.getConstant(Log2Val
, DL
,
3199 getShiftAmountTy(N0
.getValueType()))));
3202 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3203 // mul x, (2^N + 1) --> add (shl x, N), x
3204 // mul x, (2^N - 1) --> sub (shl x, N), x
3205 // Examples: x * 33 --> (x << 5) + x
3206 // x * 15 --> (x << 4) - x
3207 // x * -33 --> -((x << 5) + x)
3208 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3209 if (N1IsConst
&& TLI
.decomposeMulByConstant(VT
, N1
)) {
3210 // TODO: We could handle more general decomposition of any constant by
3211 // having the target set a limit on number of ops and making a
3212 // callback to determine that sequence (similar to sqrt expansion).
3213 unsigned MathOp
= ISD::DELETED_NODE
;
3214 APInt MulC
= ConstValue1
.abs();
3215 if ((MulC
- 1).isPowerOf2())
3217 else if ((MulC
+ 1).isPowerOf2())
3220 if (MathOp
!= ISD::DELETED_NODE
) {
3221 unsigned ShAmt
= MathOp
== ISD::ADD
? (MulC
- 1).logBase2()
3222 : (MulC
+ 1).logBase2();
3223 assert(ShAmt
> 0 && ShAmt
< VT
.getScalarSizeInBits() &&
3224 "Not expecting multiply-by-constant that could have simplified");
3226 SDValue Shl
= DAG
.getNode(ISD::SHL
, DL
, VT
, N0
,
3227 DAG
.getConstant(ShAmt
, DL
, VT
));
3228 SDValue R
= DAG
.getNode(MathOp
, DL
, VT
, Shl
, N0
);
3229 if (ConstValue1
.isNegative())
3230 R
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), R
);
3235 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3236 if (N0
.getOpcode() == ISD::SHL
&&
3237 isConstantOrConstantVector(N1
, /* NoOpaques */ true) &&
3238 isConstantOrConstantVector(N0
.getOperand(1), /* NoOpaques */ true)) {
3239 SDValue C3
= DAG
.getNode(ISD::SHL
, SDLoc(N
), VT
, N1
, N0
.getOperand(1));
3240 if (isConstantOrConstantVector(C3
))
3241 return DAG
.getNode(ISD::MUL
, SDLoc(N
), VT
, N0
.getOperand(0), C3
);
3244 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3247 SDValue
Sh(nullptr, 0), Y(nullptr, 0);
3249 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3250 if (N0
.getOpcode() == ISD::SHL
&&
3251 isConstantOrConstantVector(N0
.getOperand(1)) &&
3252 N0
.getNode()->hasOneUse()) {
3254 } else if (N1
.getOpcode() == ISD::SHL
&&
3255 isConstantOrConstantVector(N1
.getOperand(1)) &&
3256 N1
.getNode()->hasOneUse()) {
3261 SDValue Mul
= DAG
.getNode(ISD::MUL
, SDLoc(N
), VT
, Sh
.getOperand(0), Y
);
3262 return DAG
.getNode(ISD::SHL
, SDLoc(N
), VT
, Mul
, Sh
.getOperand(1));
3266 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3267 if (DAG
.isConstantIntBuildVectorOrConstantInt(N1
) &&
3268 N0
.getOpcode() == ISD::ADD
&&
3269 DAG
.isConstantIntBuildVectorOrConstantInt(N0
.getOperand(1)) &&
3270 isMulAddWithConstProfitable(N
, N0
, N1
))
3271 return DAG
.getNode(ISD::ADD
, SDLoc(N
), VT
,
3272 DAG
.getNode(ISD::MUL
, SDLoc(N0
), VT
,
3273 N0
.getOperand(0), N1
),
3274 DAG
.getNode(ISD::MUL
, SDLoc(N1
), VT
,
3275 N0
.getOperand(1), N1
));
3278 if (SDValue RMUL
= reassociateOps(ISD::MUL
, SDLoc(N
), N0
, N1
, N
->getFlags()))
3284 /// Return true if divmod libcall is available.
3285 static bool isDivRemLibcallAvailable(SDNode
*Node
, bool isSigned
,
3286 const TargetLowering
&TLI
) {
3288 EVT NodeType
= Node
->getValueType(0);
3289 if (!NodeType
.isSimple())
3291 switch (NodeType
.getSimpleVT().SimpleTy
) {
3292 default: return false; // No libcall for vector types.
3293 case MVT::i8
: LC
= isSigned
? RTLIB::SDIVREM_I8
: RTLIB::UDIVREM_I8
; break;
3294 case MVT::i16
: LC
= isSigned
? RTLIB::SDIVREM_I16
: RTLIB::UDIVREM_I16
; break;
3295 case MVT::i32
: LC
= isSigned
? RTLIB::SDIVREM_I32
: RTLIB::UDIVREM_I32
; break;
3296 case MVT::i64
: LC
= isSigned
? RTLIB::SDIVREM_I64
: RTLIB::UDIVREM_I64
; break;
3297 case MVT::i128
: LC
= isSigned
? RTLIB::SDIVREM_I128
:RTLIB::UDIVREM_I128
; break;
3300 return TLI
.getLibcallName(LC
) != nullptr;
3303 /// Issue divrem if both quotient and remainder are needed.
3304 SDValue
DAGCombiner::useDivRem(SDNode
*Node
) {
3305 if (Node
->use_empty())
3306 return SDValue(); // This is a dead node, leave it alone.
3308 unsigned Opcode
= Node
->getOpcode();
3309 bool isSigned
= (Opcode
== ISD::SDIV
) || (Opcode
== ISD::SREM
);
3310 unsigned DivRemOpc
= isSigned
? ISD::SDIVREM
: ISD::UDIVREM
;
3312 // DivMod lib calls can still work on non-legal types if using lib-calls.
3313 EVT VT
= Node
->getValueType(0);
3314 if (VT
.isVector() || !VT
.isInteger())
3317 if (!TLI
.isTypeLegal(VT
) && !TLI
.isOperationCustom(DivRemOpc
, VT
))
3320 // If DIVREM is going to get expanded into a libcall,
3321 // but there is no libcall available, then don't combine.
3322 if (!TLI
.isOperationLegalOrCustom(DivRemOpc
, VT
) &&
3323 !isDivRemLibcallAvailable(Node
, isSigned
, TLI
))
3326 // If div is legal, it's better to do the normal expansion
3327 unsigned OtherOpcode
= 0;
3328 if ((Opcode
== ISD::SDIV
) || (Opcode
== ISD::UDIV
)) {
3329 OtherOpcode
= isSigned
? ISD::SREM
: ISD::UREM
;
3330 if (TLI
.isOperationLegalOrCustom(Opcode
, VT
))
3333 OtherOpcode
= isSigned
? ISD::SDIV
: ISD::UDIV
;
3334 if (TLI
.isOperationLegalOrCustom(OtherOpcode
, VT
))
3338 SDValue Op0
= Node
->getOperand(0);
3339 SDValue Op1
= Node
->getOperand(1);
3341 for (SDNode::use_iterator UI
= Op0
.getNode()->use_begin(),
3342 UE
= Op0
.getNode()->use_end(); UI
!= UE
; ++UI
) {
3344 if (User
== Node
|| User
->getOpcode() == ISD::DELETED_NODE
||
3347 // Convert the other matching node(s), too;
3348 // otherwise, the DIVREM may get target-legalized into something
3349 // target-specific that we won't be able to recognize.
3350 unsigned UserOpc
= User
->getOpcode();
3351 if ((UserOpc
== Opcode
|| UserOpc
== OtherOpcode
|| UserOpc
== DivRemOpc
) &&
3352 User
->getOperand(0) == Op0
&&
3353 User
->getOperand(1) == Op1
) {
3355 if (UserOpc
== OtherOpcode
) {
3356 SDVTList VTs
= DAG
.getVTList(VT
, VT
);
3357 combined
= DAG
.getNode(DivRemOpc
, SDLoc(Node
), VTs
, Op0
, Op1
);
3358 } else if (UserOpc
== DivRemOpc
) {
3359 combined
= SDValue(User
, 0);
3361 assert(UserOpc
== Opcode
);
3365 if (UserOpc
== ISD::SDIV
|| UserOpc
== ISD::UDIV
)
3366 CombineTo(User
, combined
);
3367 else if (UserOpc
== ISD::SREM
|| UserOpc
== ISD::UREM
)
3368 CombineTo(User
, combined
.getValue(1));
3374 static SDValue
simplifyDivRem(SDNode
*N
, SelectionDAG
&DAG
) {
3375 SDValue N0
= N
->getOperand(0);
3376 SDValue N1
= N
->getOperand(1);
3377 EVT VT
= N
->getValueType(0);
3380 unsigned Opc
= N
->getOpcode();
3381 bool IsDiv
= (ISD::SDIV
== Opc
) || (ISD::UDIV
== Opc
);
3382 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
3384 // X / undef -> undef
3385 // X % undef -> undef
3388 // NOTE: This includes vectors where any divisor element is zero/undef.
3389 if (DAG
.isUndef(Opc
, {N0
, N1
}))
3390 return DAG
.getUNDEF(VT
);
3395 return DAG
.getConstant(0, DL
, VT
);
3399 ConstantSDNode
*N0C
= isConstOrConstSplat(N0
);
3400 if (N0C
&& N0C
->isNullValue())
3406 return DAG
.getConstant(IsDiv
? 1 : 0, DL
, VT
);
3410 // If this is a boolean op (single-bit element type), we can't have
3411 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3412 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3414 if ((N1C
&& N1C
->isOne()) || (VT
.getScalarType() == MVT::i1
))
3415 return IsDiv
? N0
: DAG
.getConstant(0, DL
, VT
);
3420 SDValue
DAGCombiner::visitSDIV(SDNode
*N
) {
3421 SDValue N0
= N
->getOperand(0);
3422 SDValue N1
= N
->getOperand(1);
3423 EVT VT
= N
->getValueType(0);
3424 EVT CCVT
= getSetCCResultType(VT
);
3428 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
3433 // fold (sdiv c1, c2) -> c1/c2
3434 ConstantSDNode
*N0C
= isConstOrConstSplat(N0
);
3435 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
3436 if (N0C
&& N1C
&& !N0C
->isOpaque() && !N1C
->isOpaque())
3437 return DAG
.FoldConstantArithmetic(ISD::SDIV
, DL
, VT
, N0C
, N1C
);
3438 // fold (sdiv X, -1) -> 0-X
3439 if (N1C
&& N1C
->isAllOnesValue())
3440 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), N0
);
3441 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3442 if (N1C
&& N1C
->getAPIntValue().isMinSignedValue())
3443 return DAG
.getSelect(DL
, VT
, DAG
.getSetCC(DL
, CCVT
, N0
, N1
, ISD::SETEQ
),
3444 DAG
.getConstant(1, DL
, VT
),
3445 DAG
.getConstant(0, DL
, VT
));
3447 if (SDValue V
= simplifyDivRem(N
, DAG
))
3450 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
3453 // If we know the sign bits of both operands are zero, strength reduce to a
3454 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3455 if (DAG
.SignBitIsZero(N1
) && DAG
.SignBitIsZero(N0
))
3456 return DAG
.getNode(ISD::UDIV
, DL
, N1
.getValueType(), N0
, N1
);
3458 if (SDValue V
= visitSDIVLike(N0
, N1
, N
)) {
3459 // If the corresponding remainder node exists, update its users with
3460 // (Dividend - (Quotient * Divisor).
3461 if (SDNode
*RemNode
= DAG
.getNodeIfExists(ISD::SREM
, N
->getVTList(),
3463 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, VT
, V
, N1
);
3464 SDValue Sub
= DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, Mul
);
3465 AddToWorklist(Mul
.getNode());
3466 AddToWorklist(Sub
.getNode());
3467 CombineTo(RemNode
, Sub
);
3472 // sdiv, srem -> sdivrem
3473 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3474 // true. Otherwise, we break the simplification logic in visitREM().
3475 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
3476 if (!N1C
|| TLI
.isIntDivCheap(N
->getValueType(0), Attr
))
3477 if (SDValue DivRem
= useDivRem(N
))
3483 SDValue
DAGCombiner::visitSDIVLike(SDValue N0
, SDValue N1
, SDNode
*N
) {
3485 EVT VT
= N
->getValueType(0);
3486 EVT CCVT
= getSetCCResultType(VT
);
3487 unsigned BitWidth
= VT
.getScalarSizeInBits();
3489 // Helper for determining whether a value is a power-2 constant scalar or a
3490 // vector of such elements.
3491 auto IsPowerOfTwo
= [](ConstantSDNode
*C
) {
3492 if (C
->isNullValue() || C
->isOpaque())
3494 if (C
->getAPIntValue().isPowerOf2())
3496 if ((-C
->getAPIntValue()).isPowerOf2())
3501 // fold (sdiv X, pow2) -> simple ops after legalize
3502 // FIXME: We check for the exact bit here because the generic lowering gives
3503 // better results in that case. The target-specific lowering should learn how
3504 // to handle exact sdivs efficiently.
3505 if (!N
->getFlags().hasExact() && ISD::matchUnaryPredicate(N1
, IsPowerOfTwo
)) {
3506 // Target-specific implementation of sdiv x, pow2.
3507 if (SDValue Res
= BuildSDIVPow2(N
))
3510 // Create constants that are functions of the shift amount value.
3511 EVT ShiftAmtTy
= getShiftAmountTy(N0
.getValueType());
3512 SDValue Bits
= DAG
.getConstant(BitWidth
, DL
, ShiftAmtTy
);
3513 SDValue C1
= DAG
.getNode(ISD::CTTZ
, DL
, VT
, N1
);
3514 C1
= DAG
.getZExtOrTrunc(C1
, DL
, ShiftAmtTy
);
3515 SDValue Inexact
= DAG
.getNode(ISD::SUB
, DL
, ShiftAmtTy
, Bits
, C1
);
3516 if (!isConstantOrConstantVector(Inexact
))
3519 // Splat the sign bit into the register
3520 SDValue Sign
= DAG
.getNode(ISD::SRA
, DL
, VT
, N0
,
3521 DAG
.getConstant(BitWidth
- 1, DL
, ShiftAmtTy
));
3522 AddToWorklist(Sign
.getNode());
3524 // Add (N0 < 0) ? abs2 - 1 : 0;
3525 SDValue Srl
= DAG
.getNode(ISD::SRL
, DL
, VT
, Sign
, Inexact
);
3526 AddToWorklist(Srl
.getNode());
3527 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, VT
, N0
, Srl
);
3528 AddToWorklist(Add
.getNode());
3529 SDValue Sra
= DAG
.getNode(ISD::SRA
, DL
, VT
, Add
, C1
);
3530 AddToWorklist(Sra
.getNode());
3532 // Special case: (sdiv X, 1) -> X
3533 // Special Case: (sdiv X, -1) -> 0-X
3534 SDValue One
= DAG
.getConstant(1, DL
, VT
);
3535 SDValue AllOnes
= DAG
.getAllOnesConstant(DL
, VT
);
3536 SDValue IsOne
= DAG
.getSetCC(DL
, CCVT
, N1
, One
, ISD::SETEQ
);
3537 SDValue IsAllOnes
= DAG
.getSetCC(DL
, CCVT
, N1
, AllOnes
, ISD::SETEQ
);
3538 SDValue IsOneOrAllOnes
= DAG
.getNode(ISD::OR
, DL
, CCVT
, IsOne
, IsAllOnes
);
3539 Sra
= DAG
.getSelect(DL
, VT
, IsOneOrAllOnes
, N0
, Sra
);
3541 // If dividing by a positive value, we're done. Otherwise, the result must
3543 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
3544 SDValue Sub
= DAG
.getNode(ISD::SUB
, DL
, VT
, Zero
, Sra
);
3546 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3547 SDValue IsNeg
= DAG
.getSetCC(DL
, CCVT
, N1
, Zero
, ISD::SETLT
);
3548 SDValue Res
= DAG
.getSelect(DL
, VT
, IsNeg
, Sub
, Sra
);
3552 // If integer divide is expensive and we satisfy the requirements, emit an
3553 // alternate sequence. Targets may check function attributes for size/speed
3555 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
3556 if (isConstantOrConstantVector(N1
) &&
3557 !TLI
.isIntDivCheap(N
->getValueType(0), Attr
))
3558 if (SDValue Op
= BuildSDIV(N
))
3564 SDValue
DAGCombiner::visitUDIV(SDNode
*N
) {
3565 SDValue N0
= N
->getOperand(0);
3566 SDValue N1
= N
->getOperand(1);
3567 EVT VT
= N
->getValueType(0);
3568 EVT CCVT
= getSetCCResultType(VT
);
3572 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
3577 // fold (udiv c1, c2) -> c1/c2
3578 ConstantSDNode
*N0C
= isConstOrConstSplat(N0
);
3579 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
3581 if (SDValue Folded
= DAG
.FoldConstantArithmetic(ISD::UDIV
, DL
, VT
,
3584 // fold (udiv X, -1) -> select(X == -1, 1, 0)
3585 if (N1C
&& N1C
->getAPIntValue().isAllOnesValue())
3586 return DAG
.getSelect(DL
, VT
, DAG
.getSetCC(DL
, CCVT
, N0
, N1
, ISD::SETEQ
),
3587 DAG
.getConstant(1, DL
, VT
),
3588 DAG
.getConstant(0, DL
, VT
));
3590 if (SDValue V
= simplifyDivRem(N
, DAG
))
3593 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
3596 if (SDValue V
= visitUDIVLike(N0
, N1
, N
)) {
3597 // If the corresponding remainder node exists, update its users with
3598 // (Dividend - (Quotient * Divisor).
3599 if (SDNode
*RemNode
= DAG
.getNodeIfExists(ISD::UREM
, N
->getVTList(),
3601 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, VT
, V
, N1
);
3602 SDValue Sub
= DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, Mul
);
3603 AddToWorklist(Mul
.getNode());
3604 AddToWorklist(Sub
.getNode());
3605 CombineTo(RemNode
, Sub
);
3610 // sdiv, srem -> sdivrem
3611 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3612 // true. Otherwise, we break the simplification logic in visitREM().
3613 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
3614 if (!N1C
|| TLI
.isIntDivCheap(N
->getValueType(0), Attr
))
3615 if (SDValue DivRem
= useDivRem(N
))
3621 SDValue
DAGCombiner::visitUDIVLike(SDValue N0
, SDValue N1
, SDNode
*N
) {
3623 EVT VT
= N
->getValueType(0);
3625 // fold (udiv x, (1 << c)) -> x >>u c
3626 if (isConstantOrConstantVector(N1
, /*NoOpaques*/ true) &&
3627 DAG
.isKnownToBeAPowerOfTwo(N1
)) {
3628 SDValue LogBase2
= BuildLogBase2(N1
, DL
);
3629 AddToWorklist(LogBase2
.getNode());
3631 EVT ShiftVT
= getShiftAmountTy(N0
.getValueType());
3632 SDValue Trunc
= DAG
.getZExtOrTrunc(LogBase2
, DL
, ShiftVT
);
3633 AddToWorklist(Trunc
.getNode());
3634 return DAG
.getNode(ISD::SRL
, DL
, VT
, N0
, Trunc
);
3637 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3638 if (N1
.getOpcode() == ISD::SHL
) {
3639 SDValue N10
= N1
.getOperand(0);
3640 if (isConstantOrConstantVector(N10
, /*NoOpaques*/ true) &&
3641 DAG
.isKnownToBeAPowerOfTwo(N10
)) {
3642 SDValue LogBase2
= BuildLogBase2(N10
, DL
);
3643 AddToWorklist(LogBase2
.getNode());
3645 EVT ADDVT
= N1
.getOperand(1).getValueType();
3646 SDValue Trunc
= DAG
.getZExtOrTrunc(LogBase2
, DL
, ADDVT
);
3647 AddToWorklist(Trunc
.getNode());
3648 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, ADDVT
, N1
.getOperand(1), Trunc
);
3649 AddToWorklist(Add
.getNode());
3650 return DAG
.getNode(ISD::SRL
, DL
, VT
, N0
, Add
);
3654 // fold (udiv x, c) -> alternate
3655 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
3656 if (isConstantOrConstantVector(N1
) &&
3657 !TLI
.isIntDivCheap(N
->getValueType(0), Attr
))
3658 if (SDValue Op
= BuildUDIV(N
))
3664 // handles ISD::SREM and ISD::UREM
3665 SDValue
DAGCombiner::visitREM(SDNode
*N
) {
3666 unsigned Opcode
= N
->getOpcode();
3667 SDValue N0
= N
->getOperand(0);
3668 SDValue N1
= N
->getOperand(1);
3669 EVT VT
= N
->getValueType(0);
3670 EVT CCVT
= getSetCCResultType(VT
);
3672 bool isSigned
= (Opcode
== ISD::SREM
);
3675 // fold (rem c1, c2) -> c1%c2
3676 ConstantSDNode
*N0C
= isConstOrConstSplat(N0
);
3677 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
3679 if (SDValue Folded
= DAG
.FoldConstantArithmetic(Opcode
, DL
, VT
, N0C
, N1C
))
3681 // fold (urem X, -1) -> select(X == -1, 0, x)
3682 if (!isSigned
&& N1C
&& N1C
->getAPIntValue().isAllOnesValue())
3683 return DAG
.getSelect(DL
, VT
, DAG
.getSetCC(DL
, CCVT
, N0
, N1
, ISD::SETEQ
),
3684 DAG
.getConstant(0, DL
, VT
), N0
);
3686 if (SDValue V
= simplifyDivRem(N
, DAG
))
3689 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
3693 // If we know the sign bits of both operands are zero, strength reduce to a
3694 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3695 if (DAG
.SignBitIsZero(N1
) && DAG
.SignBitIsZero(N0
))
3696 return DAG
.getNode(ISD::UREM
, DL
, VT
, N0
, N1
);
3698 SDValue NegOne
= DAG
.getAllOnesConstant(DL
, VT
);
3699 if (DAG
.isKnownToBeAPowerOfTwo(N1
)) {
3700 // fold (urem x, pow2) -> (and x, pow2-1)
3701 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, VT
, N1
, NegOne
);
3702 AddToWorklist(Add
.getNode());
3703 return DAG
.getNode(ISD::AND
, DL
, VT
, N0
, Add
);
3705 if (N1
.getOpcode() == ISD::SHL
&&
3706 DAG
.isKnownToBeAPowerOfTwo(N1
.getOperand(0))) {
3707 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3708 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, VT
, N1
, NegOne
);
3709 AddToWorklist(Add
.getNode());
3710 return DAG
.getNode(ISD::AND
, DL
, VT
, N0
, Add
);
3714 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
3716 // If X/C can be simplified by the division-by-constant logic, lower
3717 // X%C to the equivalent of X-X/C*C.
3718 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3719 // speculative DIV must not cause a DIVREM conversion. We guard against this
3720 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3721 // combine will not return a DIVREM. Regardless, checking cheapness here
3722 // makes sense since the simplification results in fatter code.
3723 if (DAG
.isKnownNeverZero(N1
) && !TLI
.isIntDivCheap(VT
, Attr
)) {
3724 SDValue OptimizedDiv
=
3725 isSigned
? visitSDIVLike(N0
, N1
, N
) : visitUDIVLike(N0
, N1
, N
);
3726 if (OptimizedDiv
.getNode()) {
3727 // If the equivalent Div node also exists, update its users.
3728 unsigned DivOpcode
= isSigned
? ISD::SDIV
: ISD::UDIV
;
3729 if (SDNode
*DivNode
= DAG
.getNodeIfExists(DivOpcode
, N
->getVTList(),
3731 CombineTo(DivNode
, OptimizedDiv
);
3732 SDValue Mul
= DAG
.getNode(ISD::MUL
, DL
, VT
, OptimizedDiv
, N1
);
3733 SDValue Sub
= DAG
.getNode(ISD::SUB
, DL
, VT
, N0
, Mul
);
3734 AddToWorklist(OptimizedDiv
.getNode());
3735 AddToWorklist(Mul
.getNode());
3740 // sdiv, srem -> sdivrem
3741 if (SDValue DivRem
= useDivRem(N
))
3742 return DivRem
.getValue(1);
3747 SDValue
DAGCombiner::visitMULHS(SDNode
*N
) {
3748 SDValue N0
= N
->getOperand(0);
3749 SDValue N1
= N
->getOperand(1);
3750 EVT VT
= N
->getValueType(0);
3753 if (VT
.isVector()) {
3754 // fold (mulhs x, 0) -> 0
3755 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
3757 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
3761 // fold (mulhs x, 0) -> 0
3762 if (isNullConstant(N1
))
3764 // fold (mulhs x, 1) -> (sra x, size(x)-1)
3765 if (isOneConstant(N1
))
3766 return DAG
.getNode(ISD::SRA
, DL
, N0
.getValueType(), N0
,
3767 DAG
.getConstant(N0
.getValueSizeInBits() - 1, DL
,
3768 getShiftAmountTy(N0
.getValueType())));
3770 // fold (mulhs x, undef) -> 0
3771 if (N0
.isUndef() || N1
.isUndef())
3772 return DAG
.getConstant(0, DL
, VT
);
3774 // If the type twice as wide is legal, transform the mulhs to a wider multiply
3776 if (VT
.isSimple() && !VT
.isVector()) {
3777 MVT Simple
= VT
.getSimpleVT();
3778 unsigned SimpleSize
= Simple
.getSizeInBits();
3779 EVT NewVT
= EVT::getIntegerVT(*DAG
.getContext(), SimpleSize
*2);
3780 if (TLI
.isOperationLegal(ISD::MUL
, NewVT
)) {
3781 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, NewVT
, N0
);
3782 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, NewVT
, N1
);
3783 N1
= DAG
.getNode(ISD::MUL
, DL
, NewVT
, N0
, N1
);
3784 N1
= DAG
.getNode(ISD::SRL
, DL
, NewVT
, N1
,
3785 DAG
.getConstant(SimpleSize
, DL
,
3786 getShiftAmountTy(N1
.getValueType())));
3787 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N1
);
3794 SDValue
DAGCombiner::visitMULHU(SDNode
*N
) {
3795 SDValue N0
= N
->getOperand(0);
3796 SDValue N1
= N
->getOperand(1);
3797 EVT VT
= N
->getValueType(0);
3800 if (VT
.isVector()) {
3801 // fold (mulhu x, 0) -> 0
3802 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
3804 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
3808 // fold (mulhu x, 0) -> 0
3809 if (isNullConstant(N1
))
3811 // fold (mulhu x, 1) -> 0
3812 if (isOneConstant(N1
))
3813 return DAG
.getConstant(0, DL
, N0
.getValueType());
3814 // fold (mulhu x, undef) -> 0
3815 if (N0
.isUndef() || N1
.isUndef())
3816 return DAG
.getConstant(0, DL
, VT
);
3818 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3819 if (isConstantOrConstantVector(N1
, /*NoOpaques*/ true) &&
3820 DAG
.isKnownToBeAPowerOfTwo(N1
) && hasOperation(ISD::SRL
, VT
)) {
3821 unsigned NumEltBits
= VT
.getScalarSizeInBits();
3822 SDValue LogBase2
= BuildLogBase2(N1
, DL
);
3823 SDValue SRLAmt
= DAG
.getNode(
3824 ISD::SUB
, DL
, VT
, DAG
.getConstant(NumEltBits
, DL
, VT
), LogBase2
);
3825 EVT ShiftVT
= getShiftAmountTy(N0
.getValueType());
3826 SDValue Trunc
= DAG
.getZExtOrTrunc(SRLAmt
, DL
, ShiftVT
);
3827 return DAG
.getNode(ISD::SRL
, DL
, VT
, N0
, Trunc
);
3830 // If the type twice as wide is legal, transform the mulhu to a wider multiply
3832 if (VT
.isSimple() && !VT
.isVector()) {
3833 MVT Simple
= VT
.getSimpleVT();
3834 unsigned SimpleSize
= Simple
.getSizeInBits();
3835 EVT NewVT
= EVT::getIntegerVT(*DAG
.getContext(), SimpleSize
*2);
3836 if (TLI
.isOperationLegal(ISD::MUL
, NewVT
)) {
3837 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, NewVT
, N0
);
3838 N1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, NewVT
, N1
);
3839 N1
= DAG
.getNode(ISD::MUL
, DL
, NewVT
, N0
, N1
);
3840 N1
= DAG
.getNode(ISD::SRL
, DL
, NewVT
, N1
,
3841 DAG
.getConstant(SimpleSize
, DL
,
3842 getShiftAmountTy(N1
.getValueType())));
3843 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N1
);
3850 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3851 /// give the opcodes for the two computations that are being performed. Return
3852 /// true if a simplification was made.
3853 SDValue
DAGCombiner::SimplifyNodeWithTwoResults(SDNode
*N
, unsigned LoOp
,
3855 // If the high half is not needed, just compute the low half.
3856 bool HiExists
= N
->hasAnyUseOfValue(1);
3857 if (!HiExists
&& (!LegalOperations
||
3858 TLI
.isOperationLegalOrCustom(LoOp
, N
->getValueType(0)))) {
3859 SDValue Res
= DAG
.getNode(LoOp
, SDLoc(N
), N
->getValueType(0), N
->ops());
3860 return CombineTo(N
, Res
, Res
);
3863 // If the low half is not needed, just compute the high half.
3864 bool LoExists
= N
->hasAnyUseOfValue(0);
3865 if (!LoExists
&& (!LegalOperations
||
3866 TLI
.isOperationLegalOrCustom(HiOp
, N
->getValueType(1)))) {
3867 SDValue Res
= DAG
.getNode(HiOp
, SDLoc(N
), N
->getValueType(1), N
->ops());
3868 return CombineTo(N
, Res
, Res
);
3871 // If both halves are used, return as it is.
3872 if (LoExists
&& HiExists
)
3875 // If the two computed results can be simplified separately, separate them.
3877 SDValue Lo
= DAG
.getNode(LoOp
, SDLoc(N
), N
->getValueType(0), N
->ops());
3878 AddToWorklist(Lo
.getNode());
3879 SDValue LoOpt
= combine(Lo
.getNode());
3880 if (LoOpt
.getNode() && LoOpt
.getNode() != Lo
.getNode() &&
3881 (!LegalOperations
||
3882 TLI
.isOperationLegalOrCustom(LoOpt
.getOpcode(), LoOpt
.getValueType())))
3883 return CombineTo(N
, LoOpt
, LoOpt
);
3887 SDValue Hi
= DAG
.getNode(HiOp
, SDLoc(N
), N
->getValueType(1), N
->ops());
3888 AddToWorklist(Hi
.getNode());
3889 SDValue HiOpt
= combine(Hi
.getNode());
3890 if (HiOpt
.getNode() && HiOpt
!= Hi
&&
3891 (!LegalOperations
||
3892 TLI
.isOperationLegalOrCustom(HiOpt
.getOpcode(), HiOpt
.getValueType())))
3893 return CombineTo(N
, HiOpt
, HiOpt
);
3899 SDValue
DAGCombiner::visitSMUL_LOHI(SDNode
*N
) {
3900 if (SDValue Res
= SimplifyNodeWithTwoResults(N
, ISD::MUL
, ISD::MULHS
))
3903 EVT VT
= N
->getValueType(0);
3906 // If the type is twice as wide is legal, transform the mulhu to a wider
3907 // multiply plus a shift.
3908 if (VT
.isSimple() && !VT
.isVector()) {
3909 MVT Simple
= VT
.getSimpleVT();
3910 unsigned SimpleSize
= Simple
.getSizeInBits();
3911 EVT NewVT
= EVT::getIntegerVT(*DAG
.getContext(), SimpleSize
*2);
3912 if (TLI
.isOperationLegal(ISD::MUL
, NewVT
)) {
3913 SDValue Lo
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, NewVT
, N
->getOperand(0));
3914 SDValue Hi
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, NewVT
, N
->getOperand(1));
3915 Lo
= DAG
.getNode(ISD::MUL
, DL
, NewVT
, Lo
, Hi
);
3916 // Compute the high part as N1.
3917 Hi
= DAG
.getNode(ISD::SRL
, DL
, NewVT
, Lo
,
3918 DAG
.getConstant(SimpleSize
, DL
,
3919 getShiftAmountTy(Lo
.getValueType())));
3920 Hi
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Hi
);
3921 // Compute the low part as N0.
3922 Lo
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Lo
);
3923 return CombineTo(N
, Lo
, Hi
);
3930 SDValue
DAGCombiner::visitUMUL_LOHI(SDNode
*N
) {
3931 if (SDValue Res
= SimplifyNodeWithTwoResults(N
, ISD::MUL
, ISD::MULHU
))
3934 EVT VT
= N
->getValueType(0);
3937 // If the type is twice as wide is legal, transform the mulhu to a wider
3938 // multiply plus a shift.
3939 if (VT
.isSimple() && !VT
.isVector()) {
3940 MVT Simple
= VT
.getSimpleVT();
3941 unsigned SimpleSize
= Simple
.getSizeInBits();
3942 EVT NewVT
= EVT::getIntegerVT(*DAG
.getContext(), SimpleSize
*2);
3943 if (TLI
.isOperationLegal(ISD::MUL
, NewVT
)) {
3944 SDValue Lo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, NewVT
, N
->getOperand(0));
3945 SDValue Hi
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, NewVT
, N
->getOperand(1));
3946 Lo
= DAG
.getNode(ISD::MUL
, DL
, NewVT
, Lo
, Hi
);
3947 // Compute the high part as N1.
3948 Hi
= DAG
.getNode(ISD::SRL
, DL
, NewVT
, Lo
,
3949 DAG
.getConstant(SimpleSize
, DL
,
3950 getShiftAmountTy(Lo
.getValueType())));
3951 Hi
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Hi
);
3952 // Compute the low part as N0.
3953 Lo
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Lo
);
3954 return CombineTo(N
, Lo
, Hi
);
3961 SDValue
DAGCombiner::visitMULO(SDNode
*N
) {
3962 bool IsSigned
= (ISD::SMULO
== N
->getOpcode());
3964 // (mulo x, 2) -> (addo x, x)
3965 if (ConstantSDNode
*C2
= isConstOrConstSplat(N
->getOperand(1)))
3966 if (C2
->getAPIntValue() == 2)
3967 return DAG
.getNode(IsSigned
? ISD::SADDO
: ISD::UADDO
, SDLoc(N
),
3968 N
->getVTList(), N
->getOperand(0), N
->getOperand(0));
3973 SDValue
DAGCombiner::visitIMINMAX(SDNode
*N
) {
3974 SDValue N0
= N
->getOperand(0);
3975 SDValue N1
= N
->getOperand(1);
3976 EVT VT
= N0
.getValueType();
3980 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
3983 // fold operation with constant operands.
3984 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
3985 ConstantSDNode
*N1C
= getAsNonOpaqueConstant(N1
);
3987 return DAG
.FoldConstantArithmetic(N
->getOpcode(), SDLoc(N
), VT
, N0C
, N1C
);
3989 // canonicalize constant to RHS
3990 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
3991 !DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
3992 return DAG
.getNode(N
->getOpcode(), SDLoc(N
), VT
, N1
, N0
);
3994 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3995 // Only do this if the current op isn't legal and the flipped is.
3996 unsigned Opcode
= N
->getOpcode();
3997 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3998 if (!TLI
.isOperationLegal(Opcode
, VT
) &&
3999 (N0
.isUndef() || DAG
.SignBitIsZero(N0
)) &&
4000 (N1
.isUndef() || DAG
.SignBitIsZero(N1
))) {
4003 case ISD::SMIN
: AltOpcode
= ISD::UMIN
; break;
4004 case ISD::SMAX
: AltOpcode
= ISD::UMAX
; break;
4005 case ISD::UMIN
: AltOpcode
= ISD::SMIN
; break;
4006 case ISD::UMAX
: AltOpcode
= ISD::SMAX
; break;
4007 default: llvm_unreachable("Unknown MINMAX opcode");
4009 if (TLI
.isOperationLegal(AltOpcode
, VT
))
4010 return DAG
.getNode(AltOpcode
, SDLoc(N
), VT
, N0
, N1
);
4016 /// If this is a bitwise logic instruction and both operands have the same
4017 /// opcode, try to sink the other opcode after the logic instruction.
4018 SDValue
DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode
*N
) {
4019 SDValue N0
= N
->getOperand(0), N1
= N
->getOperand(1);
4020 EVT VT
= N0
.getValueType();
4021 unsigned LogicOpcode
= N
->getOpcode();
4022 unsigned HandOpcode
= N0
.getOpcode();
4023 assert((LogicOpcode
== ISD::AND
|| LogicOpcode
== ISD::OR
||
4024 LogicOpcode
== ISD::XOR
) && "Expected logic opcode");
4025 assert(HandOpcode
== N1
.getOpcode() && "Bad input!");
4027 // Bail early if none of these transforms apply.
4028 if (N0
.getNumOperands() == 0)
4031 // FIXME: We should check number of uses of the operands to not increase
4032 // the instruction count for all transforms.
4034 // Handle size-changing casts.
4035 SDValue X
= N0
.getOperand(0);
4036 SDValue Y
= N1
.getOperand(0);
4037 EVT XVT
= X
.getValueType();
4039 if (HandOpcode
== ISD::ANY_EXTEND
|| HandOpcode
== ISD::ZERO_EXTEND
||
4040 HandOpcode
== ISD::SIGN_EXTEND
) {
4041 // If both operands have other uses, this transform would create extra
4042 // instructions without eliminating anything.
4043 if (!N0
.hasOneUse() && !N1
.hasOneUse())
4045 // We need matching integer source types.
4046 if (XVT
!= Y
.getValueType())
4048 // Don't create an illegal op during or after legalization. Don't ever
4049 // create an unsupported vector op.
4050 if ((VT
.isVector() || LegalOperations
) &&
4051 !TLI
.isOperationLegalOrCustom(LogicOpcode
, XVT
))
4053 // Avoid infinite looping with PromoteIntBinOp.
4054 // TODO: Should we apply desirable/legal constraints to all opcodes?
4055 if (HandOpcode
== ISD::ANY_EXTEND
&& LegalTypes
&&
4056 !TLI
.isTypeDesirableForOp(LogicOpcode
, XVT
))
4058 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4059 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, XVT
, X
, Y
);
4060 return DAG
.getNode(HandOpcode
, DL
, VT
, Logic
);
4063 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4064 if (HandOpcode
== ISD::TRUNCATE
) {
4065 // If both operands have other uses, this transform would create extra
4066 // instructions without eliminating anything.
4067 if (!N0
.hasOneUse() && !N1
.hasOneUse())
4069 // We need matching source types.
4070 if (XVT
!= Y
.getValueType())
4072 // Don't create an illegal op during or after legalization.
4073 if (LegalOperations
&& !TLI
.isOperationLegal(LogicOpcode
, XVT
))
4075 // Be extra careful sinking truncate. If it's free, there's no benefit in
4076 // widening a binop. Also, don't create a logic op on an illegal type.
4077 if (TLI
.isZExtFree(VT
, XVT
) && TLI
.isTruncateFree(XVT
, VT
))
4079 if (!TLI
.isTypeLegal(XVT
))
4081 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, XVT
, X
, Y
);
4082 return DAG
.getNode(HandOpcode
, DL
, VT
, Logic
);
4085 // For binops SHL/SRL/SRA/AND:
4086 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4087 if ((HandOpcode
== ISD::SHL
|| HandOpcode
== ISD::SRL
||
4088 HandOpcode
== ISD::SRA
|| HandOpcode
== ISD::AND
) &&
4089 N0
.getOperand(1) == N1
.getOperand(1)) {
4090 // If either operand has other uses, this transform is not an improvement.
4091 if (!N0
.hasOneUse() || !N1
.hasOneUse())
4093 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, XVT
, X
, Y
);
4094 return DAG
.getNode(HandOpcode
, DL
, VT
, Logic
, N0
.getOperand(1));
4097 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4098 if (HandOpcode
== ISD::BSWAP
) {
4099 // If either operand has other uses, this transform is not an improvement.
4100 if (!N0
.hasOneUse() || !N1
.hasOneUse())
4102 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, XVT
, X
, Y
);
4103 return DAG
.getNode(HandOpcode
, DL
, VT
, Logic
);
4106 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4107 // Only perform this optimization up until type legalization, before
4108 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4109 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4110 // we don't want to undo this promotion.
4111 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4113 if ((HandOpcode
== ISD::BITCAST
|| HandOpcode
== ISD::SCALAR_TO_VECTOR
) &&
4114 Level
<= AfterLegalizeTypes
) {
4115 // Input types must be integer and the same.
4116 if (XVT
.isInteger() && XVT
== Y
.getValueType()) {
4117 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, XVT
, X
, Y
);
4118 return DAG
.getNode(HandOpcode
, DL
, VT
, Logic
);
4122 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4123 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4124 // If both shuffles use the same mask, and both shuffle within a single
4125 // vector, then it is worthwhile to move the swizzle after the operation.
4126 // The type-legalizer generates this pattern when loading illegal
4127 // vector types from memory. In many cases this allows additional shuffle
4129 // There are other cases where moving the shuffle after the xor/and/or
4130 // is profitable even if shuffles don't perform a swizzle.
4131 // If both shuffles use the same mask, and both shuffles have the same first
4132 // or second operand, then it might still be profitable to move the shuffle
4133 // after the xor/and/or operation.
4134 if (HandOpcode
== ISD::VECTOR_SHUFFLE
&& Level
< AfterLegalizeDAG
) {
4135 auto *SVN0
= cast
<ShuffleVectorSDNode
>(N0
);
4136 auto *SVN1
= cast
<ShuffleVectorSDNode
>(N1
);
4137 assert(X
.getValueType() == Y
.getValueType() &&
4138 "Inputs to shuffles are not the same type");
4140 // Check that both shuffles use the same mask. The masks are known to be of
4141 // the same length because the result vector type is the same.
4142 // Check also that shuffles have only one use to avoid introducing extra
4144 if (!SVN0
->hasOneUse() || !SVN1
->hasOneUse() ||
4145 !SVN0
->getMask().equals(SVN1
->getMask()))
4148 // Don't try to fold this node if it requires introducing a
4149 // build vector of all zeros that might be illegal at this stage.
4150 SDValue ShOp
= N0
.getOperand(1);
4151 if (LogicOpcode
== ISD::XOR
&& !ShOp
.isUndef())
4152 ShOp
= tryFoldToZero(DL
, TLI
, VT
, DAG
, LegalOperations
);
4154 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4155 if (N0
.getOperand(1) == N1
.getOperand(1) && ShOp
.getNode()) {
4156 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, VT
,
4157 N0
.getOperand(0), N1
.getOperand(0));
4158 return DAG
.getVectorShuffle(VT
, DL
, Logic
, ShOp
, SVN0
->getMask());
4161 // Don't try to fold this node if it requires introducing a
4162 // build vector of all zeros that might be illegal at this stage.
4163 ShOp
= N0
.getOperand(0);
4164 if (LogicOpcode
== ISD::XOR
&& !ShOp
.isUndef())
4165 ShOp
= tryFoldToZero(DL
, TLI
, VT
, DAG
, LegalOperations
);
4167 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4168 if (N0
.getOperand(0) == N1
.getOperand(0) && ShOp
.getNode()) {
4169 SDValue Logic
= DAG
.getNode(LogicOpcode
, DL
, VT
, N0
.getOperand(1),
4171 return DAG
.getVectorShuffle(VT
, DL
, ShOp
, Logic
, SVN0
->getMask());
4178 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4179 SDValue
DAGCombiner::foldLogicOfSetCCs(bool IsAnd
, SDValue N0
, SDValue N1
,
4181 SDValue LL
, LR
, RL
, RR
, N0CC
, N1CC
;
4182 if (!isSetCCEquivalent(N0
, LL
, LR
, N0CC
) ||
4183 !isSetCCEquivalent(N1
, RL
, RR
, N1CC
))
4186 assert(N0
.getValueType() == N1
.getValueType() &&
4187 "Unexpected operand types for bitwise logic op");
4188 assert(LL
.getValueType() == LR
.getValueType() &&
4189 RL
.getValueType() == RR
.getValueType() &&
4190 "Unexpected operand types for setcc");
4192 // If we're here post-legalization or the logic op type is not i1, the logic
4193 // op type must match a setcc result type. Also, all folds require new
4194 // operations on the left and right operands, so those types must match.
4195 EVT VT
= N0
.getValueType();
4196 EVT OpVT
= LL
.getValueType();
4197 if (LegalOperations
|| VT
.getScalarType() != MVT::i1
)
4198 if (VT
!= getSetCCResultType(OpVT
))
4200 if (OpVT
!= RL
.getValueType())
4203 ISD::CondCode CC0
= cast
<CondCodeSDNode
>(N0CC
)->get();
4204 ISD::CondCode CC1
= cast
<CondCodeSDNode
>(N1CC
)->get();
4205 bool IsInteger
= OpVT
.isInteger();
4206 if (LR
== RR
&& CC0
== CC1
&& IsInteger
) {
4207 bool IsZero
= isNullOrNullSplat(LR
);
4208 bool IsNeg1
= isAllOnesOrAllOnesSplat(LR
);
4211 bool AndEqZero
= IsAnd
&& CC1
== ISD::SETEQ
&& IsZero
;
4212 // All sign bits clear?
4213 bool AndGtNeg1
= IsAnd
&& CC1
== ISD::SETGT
&& IsNeg1
;
4215 bool OrNeZero
= !IsAnd
&& CC1
== ISD::SETNE
&& IsZero
;
4216 // Any sign bits set?
4217 bool OrLtZero
= !IsAnd
&& CC1
== ISD::SETLT
&& IsZero
;
4219 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4220 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4221 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4222 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4223 if (AndEqZero
|| AndGtNeg1
|| OrNeZero
|| OrLtZero
) {
4224 SDValue Or
= DAG
.getNode(ISD::OR
, SDLoc(N0
), OpVT
, LL
, RL
);
4225 AddToWorklist(Or
.getNode());
4226 return DAG
.getSetCC(DL
, VT
, Or
, LR
, CC1
);
4230 bool AndEqNeg1
= IsAnd
&& CC1
== ISD::SETEQ
&& IsNeg1
;
4231 // All sign bits set?
4232 bool AndLtZero
= IsAnd
&& CC1
== ISD::SETLT
&& IsZero
;
4234 bool OrNeNeg1
= !IsAnd
&& CC1
== ISD::SETNE
&& IsNeg1
;
4235 // Any sign bits clear?
4236 bool OrGtNeg1
= !IsAnd
&& CC1
== ISD::SETGT
&& IsNeg1
;
4238 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4239 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4240 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4241 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4242 if (AndEqNeg1
|| AndLtZero
|| OrNeNeg1
|| OrGtNeg1
) {
4243 SDValue And
= DAG
.getNode(ISD::AND
, SDLoc(N0
), OpVT
, LL
, RL
);
4244 AddToWorklist(And
.getNode());
4245 return DAG
.getSetCC(DL
, VT
, And
, LR
, CC1
);
4249 // TODO: What is the 'or' equivalent of this fold?
4250 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4251 if (IsAnd
&& LL
== RL
&& CC0
== CC1
&& OpVT
.getScalarSizeInBits() > 1 &&
4252 IsInteger
&& CC0
== ISD::SETNE
&&
4253 ((isNullConstant(LR
) && isAllOnesConstant(RR
)) ||
4254 (isAllOnesConstant(LR
) && isNullConstant(RR
)))) {
4255 SDValue One
= DAG
.getConstant(1, DL
, OpVT
);
4256 SDValue Two
= DAG
.getConstant(2, DL
, OpVT
);
4257 SDValue Add
= DAG
.getNode(ISD::ADD
, SDLoc(N0
), OpVT
, LL
, One
);
4258 AddToWorklist(Add
.getNode());
4259 return DAG
.getSetCC(DL
, VT
, Add
, Two
, ISD::SETUGE
);
4262 // Try more general transforms if the predicates match and the only user of
4263 // the compares is the 'and' or 'or'.
4264 if (IsInteger
&& TLI
.convertSetCCLogicToBitwiseLogic(OpVT
) && CC0
== CC1
&&
4265 N0
.hasOneUse() && N1
.hasOneUse()) {
4266 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4267 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4268 if ((IsAnd
&& CC1
== ISD::SETEQ
) || (!IsAnd
&& CC1
== ISD::SETNE
)) {
4269 SDValue XorL
= DAG
.getNode(ISD::XOR
, SDLoc(N0
), OpVT
, LL
, LR
);
4270 SDValue XorR
= DAG
.getNode(ISD::XOR
, SDLoc(N1
), OpVT
, RL
, RR
);
4271 SDValue Or
= DAG
.getNode(ISD::OR
, DL
, OpVT
, XorL
, XorR
);
4272 SDValue Zero
= DAG
.getConstant(0, DL
, OpVT
);
4273 return DAG
.getSetCC(DL
, VT
, Or
, Zero
, CC1
);
4276 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4277 if ((IsAnd
&& CC1
== ISD::SETNE
) || (!IsAnd
&& CC1
== ISD::SETEQ
)) {
4278 // Match a shared variable operand and 2 non-opaque constant operands.
4279 ConstantSDNode
*C0
= isConstOrConstSplat(LR
);
4280 ConstantSDNode
*C1
= isConstOrConstSplat(RR
);
4281 if (LL
== RL
&& C0
&& C1
&& !C0
->isOpaque() && !C1
->isOpaque()) {
4282 // Canonicalize larger constant as C0.
4283 if (C1
->getAPIntValue().ugt(C0
->getAPIntValue()))
4286 // The difference of the constants must be a single bit.
4287 const APInt
&C0Val
= C0
->getAPIntValue();
4288 const APInt
&C1Val
= C1
->getAPIntValue();
4289 if ((C0Val
- C1Val
).isPowerOf2()) {
4290 // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4291 // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4292 SDValue OffsetC
= DAG
.getConstant(-C1Val
, DL
, OpVT
);
4293 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, OpVT
, LL
, OffsetC
);
4294 SDValue MaskC
= DAG
.getConstant(~(C0Val
- C1Val
), DL
, OpVT
);
4295 SDValue And
= DAG
.getNode(ISD::AND
, DL
, OpVT
, Add
, MaskC
);
4296 SDValue Zero
= DAG
.getConstant(0, DL
, OpVT
);
4297 return DAG
.getSetCC(DL
, VT
, And
, Zero
, CC0
);
4303 // Canonicalize equivalent operands to LL == RL.
4304 if (LL
== RR
&& LR
== RL
) {
4305 CC1
= ISD::getSetCCSwappedOperands(CC1
);
4309 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4310 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4311 if (LL
== RL
&& LR
== RR
) {
4312 ISD::CondCode NewCC
= IsAnd
? ISD::getSetCCAndOperation(CC0
, CC1
, IsInteger
)
4313 : ISD::getSetCCOrOperation(CC0
, CC1
, IsInteger
);
4314 if (NewCC
!= ISD::SETCC_INVALID
&&
4315 (!LegalOperations
||
4316 (TLI
.isCondCodeLegal(NewCC
, LL
.getSimpleValueType()) &&
4317 TLI
.isOperationLegal(ISD::SETCC
, OpVT
))))
4318 return DAG
.getSetCC(DL
, VT
, LL
, LR
, NewCC
);
4324 /// This contains all DAGCombine rules which reduce two values combined by
4325 /// an And operation to a single value. This makes them reusable in the context
4326 /// of visitSELECT(). Rules involving constants are not included as
4327 /// visitSELECT() already handles those cases.
4328 SDValue
DAGCombiner::visitANDLike(SDValue N0
, SDValue N1
, SDNode
*N
) {
4329 EVT VT
= N1
.getValueType();
4332 // fold (and x, undef) -> 0
4333 if (N0
.isUndef() || N1
.isUndef())
4334 return DAG
.getConstant(0, DL
, VT
);
4336 if (SDValue V
= foldLogicOfSetCCs(true, N0
, N1
, DL
))
4339 if (N0
.getOpcode() == ISD::ADD
&& N1
.getOpcode() == ISD::SRL
&&
4340 VT
.getSizeInBits() <= 64) {
4341 if (ConstantSDNode
*ADDI
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
4342 if (ConstantSDNode
*SRLI
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1))) {
4343 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4344 // immediate for an add, but it is legal if its top c2 bits are set,
4345 // transform the ADD so the immediate doesn't need to be materialized
4347 APInt ADDC
= ADDI
->getAPIntValue();
4348 APInt SRLC
= SRLI
->getAPIntValue();
4349 if (ADDC
.getMinSignedBits() <= 64 &&
4350 SRLC
.ult(VT
.getSizeInBits()) &&
4351 !TLI
.isLegalAddImmediate(ADDC
.getSExtValue())) {
4352 APInt Mask
= APInt::getHighBitsSet(VT
.getSizeInBits(),
4353 SRLC
.getZExtValue());
4354 if (DAG
.MaskedValueIsZero(N0
.getOperand(1), Mask
)) {
4356 if (TLI
.isLegalAddImmediate(ADDC
.getSExtValue())) {
4359 DAG
.getNode(ISD::ADD
, DL0
, VT
,
4360 N0
.getOperand(0), DAG
.getConstant(ADDC
, DL
, VT
));
4361 CombineTo(N0
.getNode(), NewAdd
);
4362 // Return N so it doesn't get rechecked!
4363 return SDValue(N
, 0);
4371 // Reduce bit extract of low half of an integer to the narrower type.
4372 // (and (srl i64:x, K), KMask) ->
4373 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4374 if (N0
.getOpcode() == ISD::SRL
&& N0
.hasOneUse()) {
4375 if (ConstantSDNode
*CAnd
= dyn_cast
<ConstantSDNode
>(N1
)) {
4376 if (ConstantSDNode
*CShift
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
4377 unsigned Size
= VT
.getSizeInBits();
4378 const APInt
&AndMask
= CAnd
->getAPIntValue();
4379 unsigned ShiftBits
= CShift
->getZExtValue();
4381 // Bail out, this node will probably disappear anyway.
4385 unsigned MaskBits
= AndMask
.countTrailingOnes();
4386 EVT HalfVT
= EVT::getIntegerVT(*DAG
.getContext(), Size
/ 2);
4388 if (AndMask
.isMask() &&
4389 // Required bits must not span the two halves of the integer and
4390 // must fit in the half size type.
4391 (ShiftBits
+ MaskBits
<= Size
/ 2) &&
4392 TLI
.isNarrowingProfitable(VT
, HalfVT
) &&
4393 TLI
.isTypeDesirableForOp(ISD::AND
, HalfVT
) &&
4394 TLI
.isTypeDesirableForOp(ISD::SRL
, HalfVT
) &&
4395 TLI
.isTruncateFree(VT
, HalfVT
) &&
4396 TLI
.isZExtFree(HalfVT
, VT
)) {
4397 // The isNarrowingProfitable is to avoid regressions on PPC and
4398 // AArch64 which match a few 64-bit bit insert / bit extract patterns
4399 // on downstream users of this. Those patterns could probably be
4400 // extended to handle extensions mixed in.
4403 assert(MaskBits
<= Size
);
4405 // Extracting the highest bit of the low half.
4406 EVT ShiftVT
= TLI
.getShiftAmountTy(HalfVT
, DAG
.getDataLayout());
4407 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SL
, HalfVT
,
4410 SDValue NewMask
= DAG
.getConstant(AndMask
.trunc(Size
/ 2), SL
, HalfVT
);
4411 SDValue ShiftK
= DAG
.getConstant(ShiftBits
, SL
, ShiftVT
);
4412 SDValue Shift
= DAG
.getNode(ISD::SRL
, SL
, HalfVT
, Trunc
, ShiftK
);
4413 SDValue And
= DAG
.getNode(ISD::AND
, SL
, HalfVT
, Shift
, NewMask
);
4414 return DAG
.getNode(ISD::ZERO_EXTEND
, SL
, VT
, And
);
4423 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode
*AndC
, LoadSDNode
*LoadN
,
4424 EVT LoadResultTy
, EVT
&ExtVT
) {
4425 if (!AndC
->getAPIntValue().isMask())
4428 unsigned ActiveBits
= AndC
->getAPIntValue().countTrailingOnes();
4430 ExtVT
= EVT::getIntegerVT(*DAG
.getContext(), ActiveBits
);
4431 EVT LoadedVT
= LoadN
->getMemoryVT();
4433 if (ExtVT
== LoadedVT
&&
4434 (!LegalOperations
||
4435 TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, LoadResultTy
, ExtVT
))) {
4436 // ZEXTLOAD will match without needing to change the size of the value being
4441 // Do not change the width of a volatile load.
4442 if (LoadN
->isVolatile())
4445 // Do not generate loads of non-round integer types since these can
4446 // be expensive (and would be wrong if the type is not byte sized).
4447 if (!LoadedVT
.bitsGT(ExtVT
) || !ExtVT
.isRound())
4450 if (LegalOperations
&&
4451 !TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, LoadResultTy
, ExtVT
))
4454 if (!TLI
.shouldReduceLoadWidth(LoadN
, ISD::ZEXTLOAD
, ExtVT
))
4460 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode
*LDST
,
4461 ISD::LoadExtType ExtType
, EVT
&MemVT
,
4465 // Only allow byte offsets.
4469 // Do not generate loads of non-round integer types since these can
4470 // be expensive (and would be wrong if the type is not byte sized).
4471 if (!MemVT
.isRound())
4474 // Don't change the width of a volatile load.
4475 if (LDST
->isVolatile())
4478 // Verify that we are actually reducing a load width here.
4479 if (LDST
->getMemoryVT().getSizeInBits() < MemVT
.getSizeInBits())
4482 // Ensure that this isn't going to produce an unsupported unaligned access.
4484 !TLI
.allowsMemoryAccess(*DAG
.getContext(), DAG
.getDataLayout(), MemVT
,
4485 LDST
->getAddressSpace(), ShAmt
/ 8))
4488 // It's not possible to generate a constant of extended or untyped type.
4489 EVT PtrType
= LDST
->getBasePtr().getValueType();
4490 if (PtrType
== MVT::Untyped
|| PtrType
.isExtended())
4493 if (isa
<LoadSDNode
>(LDST
)) {
4494 LoadSDNode
*Load
= cast
<LoadSDNode
>(LDST
);
4495 // Don't transform one with multiple uses, this would require adding a new
4497 if (!SDValue(Load
, 0).hasOneUse())
4500 if (LegalOperations
&&
4501 !TLI
.isLoadExtLegal(ExtType
, Load
->getValueType(0), MemVT
))
4504 // For the transform to be legal, the load must produce only two values
4505 // (the value loaded and the chain). Don't transform a pre-increment
4506 // load, for example, which produces an extra value. Otherwise the
4507 // transformation is not equivalent, and the downstream logic to replace
4508 // uses gets things wrong.
4509 if (Load
->getNumValues() > 2)
4512 // If the load that we're shrinking is an extload and we're not just
4513 // discarding the extension we can't simply shrink the load. Bail.
4514 // TODO: It would be possible to merge the extensions in some cases.
4515 if (Load
->getExtensionType() != ISD::NON_EXTLOAD
&&
4516 Load
->getMemoryVT().getSizeInBits() < MemVT
.getSizeInBits() + ShAmt
)
4519 if (!TLI
.shouldReduceLoadWidth(Load
, ExtType
, MemVT
))
4522 assert(isa
<StoreSDNode
>(LDST
) && "It is not a Load nor a Store SDNode");
4523 StoreSDNode
*Store
= cast
<StoreSDNode
>(LDST
);
4524 // Can't write outside the original store
4525 if (Store
->getMemoryVT().getSizeInBits() < MemVT
.getSizeInBits() + ShAmt
)
4528 if (LegalOperations
&&
4529 !TLI
.isTruncStoreLegal(Store
->getValue().getValueType(), MemVT
))
4535 bool DAGCombiner::SearchForAndLoads(SDNode
*N
,
4536 SmallVectorImpl
<LoadSDNode
*> &Loads
,
4537 SmallPtrSetImpl
<SDNode
*> &NodesWithConsts
,
4538 ConstantSDNode
*Mask
,
4539 SDNode
*&NodeToMask
) {
4540 // Recursively search for the operands, looking for loads which can be
4542 for (unsigned i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
4543 SDValue Op
= N
->getOperand(i
);
4545 if (Op
.getValueType().isVector())
4548 // Some constants may need fixing up later if they are too large.
4549 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
)) {
4550 if ((N
->getOpcode() == ISD::OR
|| N
->getOpcode() == ISD::XOR
) &&
4551 (Mask
->getAPIntValue() & C
->getAPIntValue()) != C
->getAPIntValue())
4552 NodesWithConsts
.insert(N
);
4556 if (!Op
.hasOneUse())
4559 switch(Op
.getOpcode()) {
4561 auto *Load
= cast
<LoadSDNode
>(Op
);
4563 if (isAndLoadExtLoad(Mask
, Load
, Load
->getValueType(0), ExtVT
) &&
4564 isLegalNarrowLdSt(Load
, ISD::ZEXTLOAD
, ExtVT
)) {
4566 // ZEXTLOAD is already small enough.
4567 if (Load
->getExtensionType() == ISD::ZEXTLOAD
&&
4568 ExtVT
.bitsGE(Load
->getMemoryVT()))
4571 // Use LE to convert equal sized loads to zext.
4572 if (ExtVT
.bitsLE(Load
->getMemoryVT()))
4573 Loads
.push_back(Load
);
4579 case ISD::ZERO_EXTEND
:
4580 case ISD::AssertZext
: {
4581 unsigned ActiveBits
= Mask
->getAPIntValue().countTrailingOnes();
4582 EVT ExtVT
= EVT::getIntegerVT(*DAG
.getContext(), ActiveBits
);
4583 EVT VT
= Op
.getOpcode() == ISD::AssertZext
?
4584 cast
<VTSDNode
>(Op
.getOperand(1))->getVT() :
4585 Op
.getOperand(0).getValueType();
4587 // We can accept extending nodes if the mask is wider or an equal
4588 // width to the original type.
4589 if (ExtVT
.bitsGE(VT
))
4596 if (!SearchForAndLoads(Op
.getNode(), Loads
, NodesWithConsts
, Mask
,
4602 // Allow one node which will masked along with any loads found.
4606 // Also ensure that the node to be masked only produces one data result.
4607 NodeToMask
= Op
.getNode();
4608 if (NodeToMask
->getNumValues() > 1) {
4609 bool HasValue
= false;
4610 for (unsigned i
= 0, e
= NodeToMask
->getNumValues(); i
< e
; ++i
) {
4611 MVT VT
= SDValue(NodeToMask
, i
).getSimpleValueType();
4612 if (VT
!= MVT::Glue
&& VT
!= MVT::Other
) {
4614 NodeToMask
= nullptr;
4620 assert(HasValue
&& "Node to be masked has no data result?");
4626 bool DAGCombiner::BackwardsPropagateMask(SDNode
*N
, SelectionDAG
&DAG
) {
4627 auto *Mask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
4631 if (!Mask
->getAPIntValue().isMask())
4634 // No need to do anything if the and directly uses a load.
4635 if (isa
<LoadSDNode
>(N
->getOperand(0)))
4638 SmallVector
<LoadSDNode
*, 8> Loads
;
4639 SmallPtrSet
<SDNode
*, 2> NodesWithConsts
;
4640 SDNode
*FixupNode
= nullptr;
4641 if (SearchForAndLoads(N
, Loads
, NodesWithConsts
, Mask
, FixupNode
)) {
4642 if (Loads
.size() == 0)
4645 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N
->dump());
4646 SDValue MaskOp
= N
->getOperand(1);
4648 // If it exists, fixup the single node we allow in the tree that needs
4651 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode
->dump());
4652 SDValue And
= DAG
.getNode(ISD::AND
, SDLoc(FixupNode
),
4653 FixupNode
->getValueType(0),
4654 SDValue(FixupNode
, 0), MaskOp
);
4655 DAG
.ReplaceAllUsesOfValueWith(SDValue(FixupNode
, 0), And
);
4656 if (And
.getOpcode() == ISD ::AND
)
4657 DAG
.UpdateNodeOperands(And
.getNode(), SDValue(FixupNode
, 0), MaskOp
);
4660 // Narrow any constants that need it.
4661 for (auto *LogicN
: NodesWithConsts
) {
4662 SDValue Op0
= LogicN
->getOperand(0);
4663 SDValue Op1
= LogicN
->getOperand(1);
4665 if (isa
<ConstantSDNode
>(Op0
))
4666 std::swap(Op0
, Op1
);
4668 SDValue And
= DAG
.getNode(ISD::AND
, SDLoc(Op1
), Op1
.getValueType(),
4671 DAG
.UpdateNodeOperands(LogicN
, Op0
, And
);
4674 // Create narrow loads.
4675 for (auto *Load
: Loads
) {
4676 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load
->dump());
4677 SDValue And
= DAG
.getNode(ISD::AND
, SDLoc(Load
), Load
->getValueType(0),
4678 SDValue(Load
, 0), MaskOp
);
4679 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 0), And
);
4680 if (And
.getOpcode() == ISD ::AND
)
4682 DAG
.UpdateNodeOperands(And
.getNode(), SDValue(Load
, 0), MaskOp
), 0);
4683 SDValue NewLoad
= ReduceLoadWidth(And
.getNode());
4685 "Shouldn't be masking the load if it can't be narrowed");
4686 CombineTo(Load
, NewLoad
, NewLoad
.getValue(1));
4688 DAG
.ReplaceAllUsesWith(N
, N
->getOperand(0).getNode());
4695 // x & (-1 'logical shift' y)
4697 // (x 'opposite logical shift' y) 'logical shift' y
4698 // if it is better for performance.
4699 SDValue
DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode
*N
) {
4700 assert(N
->getOpcode() == ISD::AND
);
4702 SDValue N0
= N
->getOperand(0);
4703 SDValue N1
= N
->getOperand(1);
4705 // Do we actually prefer shifts over mask?
4706 if (!TLI
.shouldFoldMaskToVariableShiftPair(N0
))
4709 // Try to match (-1 '[outer] logical shift' y)
4710 unsigned OuterShift
;
4711 unsigned InnerShift
; // The opposite direction to the OuterShift.
4712 SDValue Y
; // Shift amount.
4713 auto matchMask
= [&OuterShift
, &InnerShift
, &Y
](SDValue M
) -> bool {
4716 OuterShift
= M
->getOpcode();
4717 if (OuterShift
== ISD::SHL
)
4718 InnerShift
= ISD::SRL
;
4719 else if (OuterShift
== ISD::SRL
)
4720 InnerShift
= ISD::SHL
;
4723 if (!isAllOnesConstant(M
->getOperand(0)))
4725 Y
= M
->getOperand(1);
4732 else if (matchMask(N0
))
4738 EVT VT
= N
->getValueType(0);
4740 // tmp = x 'opposite logical shift' y
4741 SDValue T0
= DAG
.getNode(InnerShift
, DL
, VT
, X
, Y
);
4742 // ret = tmp 'logical shift' y
4743 SDValue T1
= DAG
.getNode(OuterShift
, DL
, VT
, T0
, Y
);
4748 SDValue
DAGCombiner::visitAND(SDNode
*N
) {
4749 SDValue N0
= N
->getOperand(0);
4750 SDValue N1
= N
->getOperand(1);
4751 EVT VT
= N1
.getValueType();
4758 if (VT
.isVector()) {
4759 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
4762 // fold (and x, 0) -> 0, vector edition
4763 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
4764 // do not return N0, because undef node may exist in N0
4765 return DAG
.getConstant(APInt::getNullValue(N0
.getScalarValueSizeInBits()),
4766 SDLoc(N
), N0
.getValueType());
4767 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
4768 // do not return N1, because undef node may exist in N1
4769 return DAG
.getConstant(APInt::getNullValue(N1
.getScalarValueSizeInBits()),
4770 SDLoc(N
), N1
.getValueType());
4772 // fold (and x, -1) -> x, vector edition
4773 if (ISD::isBuildVectorAllOnes(N0
.getNode()))
4775 if (ISD::isBuildVectorAllOnes(N1
.getNode()))
4779 // fold (and c1, c2) -> c1&c2
4780 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
4781 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
4782 if (N0C
&& N1C
&& !N1C
->isOpaque())
4783 return DAG
.FoldConstantArithmetic(ISD::AND
, SDLoc(N
), VT
, N0C
, N1C
);
4784 // canonicalize constant to RHS
4785 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
4786 !DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
4787 return DAG
.getNode(ISD::AND
, SDLoc(N
), VT
, N1
, N0
);
4788 // fold (and x, -1) -> x
4789 if (isAllOnesConstant(N1
))
4791 // if (and x, c) is known to be zero, return 0
4792 unsigned BitWidth
= VT
.getScalarSizeInBits();
4793 if (N1C
&& DAG
.MaskedValueIsZero(SDValue(N
, 0),
4794 APInt::getAllOnesValue(BitWidth
)))
4795 return DAG
.getConstant(0, SDLoc(N
), VT
);
4797 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
4801 if (SDValue RAND
= reassociateOps(ISD::AND
, SDLoc(N
), N0
, N1
, N
->getFlags()))
4804 // Try to convert a constant mask AND into a shuffle clear mask.
4806 if (SDValue Shuffle
= XformToShuffleWithZero(N
))
4809 // fold (and (or x, C), D) -> D if (C & D) == D
4810 auto MatchSubset
= [](ConstantSDNode
*LHS
, ConstantSDNode
*RHS
) {
4811 return RHS
->getAPIntValue().isSubsetOf(LHS
->getAPIntValue());
4813 if (N0
.getOpcode() == ISD::OR
&&
4814 ISD::matchBinaryPredicate(N0
.getOperand(1), N1
, MatchSubset
))
4816 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4817 if (N1C
&& N0
.getOpcode() == ISD::ANY_EXTEND
) {
4818 SDValue N0Op0
= N0
.getOperand(0);
4819 APInt Mask
= ~N1C
->getAPIntValue();
4820 Mask
= Mask
.trunc(N0Op0
.getScalarValueSizeInBits());
4821 if (DAG
.MaskedValueIsZero(N0Op0
, Mask
)) {
4822 SDValue Zext
= DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(N
),
4823 N0
.getValueType(), N0Op0
);
4825 // Replace uses of the AND with uses of the Zero extend node.
4828 // We actually want to replace all uses of the any_extend with the
4829 // zero_extend, to avoid duplicating things. This will later cause this
4830 // AND to be folded.
4831 CombineTo(N0
.getNode(), Zext
);
4832 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
4835 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4836 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4837 // already be zero by virtue of the width of the base type of the load.
4839 // the 'X' node here can either be nothing or an extract_vector_elt to catch
4841 if ((N0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
4842 N0
.getValueSizeInBits() == N0
.getOperand(0).getScalarValueSizeInBits() &&
4843 N0
.getOperand(0).getOpcode() == ISD::LOAD
&&
4844 N0
.getOperand(0).getResNo() == 0) ||
4845 (N0
.getOpcode() == ISD::LOAD
&& N0
.getResNo() == 0)) {
4846 LoadSDNode
*Load
= cast
<LoadSDNode
>( (N0
.getOpcode() == ISD::LOAD
) ?
4847 N0
: N0
.getOperand(0) );
4849 // Get the constant (if applicable) the zero'th operand is being ANDed with.
4850 // This can be a pure constant or a vector splat, in which case we treat the
4851 // vector as a scalar and use the splat value.
4852 APInt Constant
= APInt::getNullValue(1);
4853 if (const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N1
)) {
4854 Constant
= C
->getAPIntValue();
4855 } else if (BuildVectorSDNode
*Vector
= dyn_cast
<BuildVectorSDNode
>(N1
)) {
4856 APInt SplatValue
, SplatUndef
;
4857 unsigned SplatBitSize
;
4859 bool IsSplat
= Vector
->isConstantSplat(SplatValue
, SplatUndef
,
4860 SplatBitSize
, HasAnyUndefs
);
4862 // Undef bits can contribute to a possible optimisation if set, so
4864 SplatValue
|= SplatUndef
;
4866 // The splat value may be something like "0x00FFFFFF", which means 0 for
4867 // the first vector value and FF for the rest, repeating. We need a mask
4868 // that will apply equally to all members of the vector, so AND all the
4869 // lanes of the constant together.
4870 EVT VT
= Vector
->getValueType(0);
4871 unsigned BitWidth
= VT
.getScalarSizeInBits();
4873 // If the splat value has been compressed to a bitlength lower
4874 // than the size of the vector lane, we need to re-expand it to
4876 if (BitWidth
> SplatBitSize
)
4877 for (SplatValue
= SplatValue
.zextOrTrunc(BitWidth
);
4878 SplatBitSize
< BitWidth
;
4879 SplatBitSize
= SplatBitSize
* 2)
4880 SplatValue
|= SplatValue
.shl(SplatBitSize
);
4882 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4883 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4884 if (SplatBitSize
% BitWidth
== 0) {
4885 Constant
= APInt::getAllOnesValue(BitWidth
);
4886 for (unsigned i
= 0, n
= SplatBitSize
/BitWidth
; i
< n
; ++i
)
4887 Constant
&= SplatValue
.lshr(i
*BitWidth
).zextOrTrunc(BitWidth
);
4892 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4893 // actually legal and isn't going to get expanded, else this is a false
4895 bool CanZextLoadProfitably
= TLI
.isLoadExtLegal(ISD::ZEXTLOAD
,
4896 Load
->getValueType(0),
4897 Load
->getMemoryVT());
4899 // Resize the constant to the same size as the original memory access before
4900 // extension. If it is still the AllOnesValue then this AND is completely
4902 Constant
= Constant
.zextOrTrunc(Load
->getMemoryVT().getScalarSizeInBits());
4905 switch (Load
->getExtensionType()) {
4906 default: B
= false; break;
4907 case ISD::EXTLOAD
: B
= CanZextLoadProfitably
; break;
4909 case ISD::NON_EXTLOAD
: B
= true; break;
4912 if (B
&& Constant
.isAllOnesValue()) {
4913 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4914 // preserve semantics once we get rid of the AND.
4915 SDValue
NewLoad(Load
, 0);
4917 // Fold the AND away. NewLoad may get replaced immediately.
4918 CombineTo(N
, (N0
.getNode() == Load
) ? NewLoad
: N0
);
4920 if (Load
->getExtensionType() == ISD::EXTLOAD
) {
4921 NewLoad
= DAG
.getLoad(Load
->getAddressingMode(), ISD::ZEXTLOAD
,
4922 Load
->getValueType(0), SDLoc(Load
),
4923 Load
->getChain(), Load
->getBasePtr(),
4924 Load
->getOffset(), Load
->getMemoryVT(),
4925 Load
->getMemOperand());
4926 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4927 if (Load
->getNumValues() == 3) {
4928 // PRE/POST_INC loads have 3 values.
4929 SDValue To
[] = { NewLoad
.getValue(0), NewLoad
.getValue(1),
4930 NewLoad
.getValue(2) };
4931 CombineTo(Load
, To
, 3, true);
4933 CombineTo(Load
, NewLoad
.getValue(0), NewLoad
.getValue(1));
4937 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
4941 // fold (and (load x), 255) -> (zextload x, i8)
4942 // fold (and (extload x, i16), 255) -> (zextload x, i8)
4943 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4944 if (!VT
.isVector() && N1C
&& (N0
.getOpcode() == ISD::LOAD
||
4945 (N0
.getOpcode() == ISD::ANY_EXTEND
&&
4946 N0
.getOperand(0).getOpcode() == ISD::LOAD
))) {
4947 if (SDValue Res
= ReduceLoadWidth(N
)) {
4948 LoadSDNode
*LN0
= N0
->getOpcode() == ISD::ANY_EXTEND
4949 ? cast
<LoadSDNode
>(N0
.getOperand(0)) : cast
<LoadSDNode
>(N0
);
4951 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN0
, 0), Res
);
4952 return SDValue(N
, 0);
4956 if (Level
>= AfterLegalizeTypes
) {
4957 // Attempt to propagate the AND back up to the leaves which, if they're
4958 // loads, can be combined to narrow loads and the AND node can be removed.
4959 // Perform after legalization so that extend nodes will already be
4960 // combined into the loads.
4961 if (BackwardsPropagateMask(N
, DAG
)) {
4962 return SDValue(N
, 0);
4966 if (SDValue Combined
= visitANDLike(N0
, N1
, N
))
4969 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4970 if (N0
.getOpcode() == N1
.getOpcode())
4971 if (SDValue V
= hoistLogicOpWithSameOpcodeHands(N
))
4974 // Masking the negated extension of a boolean is just the zero-extended
4976 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4977 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4979 // Note: the SimplifyDemandedBits fold below can make an information-losing
4980 // transform, and then we have no way to find this better fold.
4981 if (N1C
&& N1C
->isOne() && N0
.getOpcode() == ISD::SUB
) {
4982 if (isNullOrNullSplat(N0
.getOperand(0))) {
4983 SDValue SubRHS
= N0
.getOperand(1);
4984 if (SubRHS
.getOpcode() == ISD::ZERO_EXTEND
&&
4985 SubRHS
.getOperand(0).getScalarValueSizeInBits() == 1)
4987 if (SubRHS
.getOpcode() == ISD::SIGN_EXTEND
&&
4988 SubRHS
.getOperand(0).getScalarValueSizeInBits() == 1)
4989 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(N
), VT
, SubRHS
.getOperand(0));
4993 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4994 // fold (and (sra)) -> (and (srl)) when possible.
4995 if (SimplifyDemandedBits(SDValue(N
, 0)))
4996 return SDValue(N
, 0);
4998 // fold (zext_inreg (extload x)) -> (zextload x)
4999 if (ISD::isEXTLoad(N0
.getNode()) && ISD::isUNINDEXEDLoad(N0
.getNode())) {
5000 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
5001 EVT MemVT
= LN0
->getMemoryVT();
5002 // If we zero all the possible extended bits, then we can turn this into
5003 // a zextload if we are running before legalize or the operation is legal.
5004 unsigned BitWidth
= N1
.getScalarValueSizeInBits();
5005 if (DAG
.MaskedValueIsZero(N1
, APInt::getHighBitsSet(BitWidth
,
5006 BitWidth
- MemVT
.getScalarSizeInBits())) &&
5007 ((!LegalOperations
&& !LN0
->isVolatile()) ||
5008 TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, VT
, MemVT
))) {
5009 SDValue ExtLoad
= DAG
.getExtLoad(ISD::ZEXTLOAD
, SDLoc(N0
), VT
,
5010 LN0
->getChain(), LN0
->getBasePtr(),
5011 MemVT
, LN0
->getMemOperand());
5013 CombineTo(N0
.getNode(), ExtLoad
, ExtLoad
.getValue(1));
5014 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
5017 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5018 if (ISD::isSEXTLoad(N0
.getNode()) && ISD::isUNINDEXEDLoad(N0
.getNode()) &&
5020 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
5021 EVT MemVT
= LN0
->getMemoryVT();
5022 // If we zero all the possible extended bits, then we can turn this into
5023 // a zextload if we are running before legalize or the operation is legal.
5024 unsigned BitWidth
= N1
.getScalarValueSizeInBits();
5025 if (DAG
.MaskedValueIsZero(N1
, APInt::getHighBitsSet(BitWidth
,
5026 BitWidth
- MemVT
.getScalarSizeInBits())) &&
5027 ((!LegalOperations
&& !LN0
->isVolatile()) ||
5028 TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, VT
, MemVT
))) {
5029 SDValue ExtLoad
= DAG
.getExtLoad(ISD::ZEXTLOAD
, SDLoc(N0
), VT
,
5030 LN0
->getChain(), LN0
->getBasePtr(),
5031 MemVT
, LN0
->getMemOperand());
5033 CombineTo(N0
.getNode(), ExtLoad
, ExtLoad
.getValue(1));
5034 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
5037 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5038 if (N1C
&& N1C
->getAPIntValue() == 0xffff && N0
.getOpcode() == ISD::OR
) {
5039 if (SDValue BSwap
= MatchBSwapHWordLow(N0
.getNode(), N0
.getOperand(0),
5040 N0
.getOperand(1), false))
5044 if (SDValue Shifts
= unfoldExtremeBitClearingToShifts(N
))
5050 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5051 SDValue
DAGCombiner::MatchBSwapHWordLow(SDNode
*N
, SDValue N0
, SDValue N1
,
5052 bool DemandHighBits
) {
5053 if (!LegalOperations
)
5056 EVT VT
= N
->getValueType(0);
5057 if (VT
!= MVT::i64
&& VT
!= MVT::i32
&& VT
!= MVT::i16
)
5059 if (!TLI
.isOperationLegalOrCustom(ISD::BSWAP
, VT
))
5062 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5063 bool LookPassAnd0
= false;
5064 bool LookPassAnd1
= false;
5065 if (N0
.getOpcode() == ISD::AND
&& N0
.getOperand(0).getOpcode() == ISD::SRL
)
5067 if (N1
.getOpcode() == ISD::AND
&& N1
.getOperand(0).getOpcode() == ISD::SHL
)
5069 if (N0
.getOpcode() == ISD::AND
) {
5070 if (!N0
.getNode()->hasOneUse())
5072 ConstantSDNode
*N01C
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5073 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5074 // This is needed for X86.
5075 if (!N01C
|| (N01C
->getZExtValue() != 0xFF00 &&
5076 N01C
->getZExtValue() != 0xFFFF))
5078 N0
= N0
.getOperand(0);
5079 LookPassAnd0
= true;
5082 if (N1
.getOpcode() == ISD::AND
) {
5083 if (!N1
.getNode()->hasOneUse())
5085 ConstantSDNode
*N11C
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1));
5086 if (!N11C
|| N11C
->getZExtValue() != 0xFF)
5088 N1
= N1
.getOperand(0);
5089 LookPassAnd1
= true;
5092 if (N0
.getOpcode() == ISD::SRL
&& N1
.getOpcode() == ISD::SHL
)
5094 if (N0
.getOpcode() != ISD::SHL
|| N1
.getOpcode() != ISD::SRL
)
5096 if (!N0
.getNode()->hasOneUse() || !N1
.getNode()->hasOneUse())
5099 ConstantSDNode
*N01C
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5100 ConstantSDNode
*N11C
= dyn_cast
<ConstantSDNode
>(N1
.getOperand(1));
5103 if (N01C
->getZExtValue() != 8 || N11C
->getZExtValue() != 8)
5106 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5107 SDValue N00
= N0
->getOperand(0);
5108 if (!LookPassAnd0
&& N00
.getOpcode() == ISD::AND
) {
5109 if (!N00
.getNode()->hasOneUse())
5111 ConstantSDNode
*N001C
= dyn_cast
<ConstantSDNode
>(N00
.getOperand(1));
5112 if (!N001C
|| N001C
->getZExtValue() != 0xFF)
5114 N00
= N00
.getOperand(0);
5115 LookPassAnd0
= true;
5118 SDValue N10
= N1
->getOperand(0);
5119 if (!LookPassAnd1
&& N10
.getOpcode() == ISD::AND
) {
5120 if (!N10
.getNode()->hasOneUse())
5122 ConstantSDNode
*N101C
= dyn_cast
<ConstantSDNode
>(N10
.getOperand(1));
5123 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5125 if (!N101C
|| (N101C
->getZExtValue() != 0xFF00 &&
5126 N101C
->getZExtValue() != 0xFFFF))
5128 N10
= N10
.getOperand(0);
5129 LookPassAnd1
= true;
5135 // Make sure everything beyond the low halfword gets set to zero since the SRL
5136 // 16 will clear the top bits.
5137 unsigned OpSizeInBits
= VT
.getSizeInBits();
5138 if (DemandHighBits
&& OpSizeInBits
> 16) {
5139 // If the left-shift isn't masked out then the only way this is a bswap is
5140 // if all bits beyond the low 8 are 0. In that case the entire pattern
5141 // reduces to a left shift anyway: leave it for other parts of the combiner.
5145 // However, if the right shift isn't masked out then it might be because
5146 // it's not needed. See if we can spot that too.
5147 if (!LookPassAnd1
&&
5148 !DAG
.MaskedValueIsZero(
5149 N10
, APInt::getHighBitsSet(OpSizeInBits
, OpSizeInBits
- 16)))
5153 SDValue Res
= DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VT
, N00
);
5154 if (OpSizeInBits
> 16) {
5156 Res
= DAG
.getNode(ISD::SRL
, DL
, VT
, Res
,
5157 DAG
.getConstant(OpSizeInBits
- 16, DL
,
5158 getShiftAmountTy(VT
)));
5163 /// Return true if the specified node is an element that makes up a 32-bit
5164 /// packed halfword byteswap.
5165 /// ((x & 0x000000ff) << 8) |
5166 /// ((x & 0x0000ff00) >> 8) |
5167 /// ((x & 0x00ff0000) << 8) |
5168 /// ((x & 0xff000000) >> 8)
5169 static bool isBSwapHWordElement(SDValue N
, MutableArrayRef
<SDNode
*> Parts
) {
5170 if (!N
.getNode()->hasOneUse())
5173 unsigned Opc
= N
.getOpcode();
5174 if (Opc
!= ISD::AND
&& Opc
!= ISD::SHL
&& Opc
!= ISD::SRL
)
5177 SDValue N0
= N
.getOperand(0);
5178 unsigned Opc0
= N0
.getOpcode();
5179 if (Opc0
!= ISD::AND
&& Opc0
!= ISD::SHL
&& Opc0
!= ISD::SRL
)
5182 ConstantSDNode
*N1C
= nullptr;
5183 // SHL or SRL: look upstream for AND mask operand
5184 if (Opc
== ISD::AND
)
5185 N1C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
5186 else if (Opc0
== ISD::AND
)
5187 N1C
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5191 unsigned MaskByteOffset
;
5192 switch (N1C
->getZExtValue()) {
5195 case 0xFF: MaskByteOffset
= 0; break;
5196 case 0xFF00: MaskByteOffset
= 1; break;
5198 // In case demanded bits didn't clear the bits that will be shifted out.
5199 // This is needed for X86.
5200 if (Opc
== ISD::SRL
|| (Opc
== ISD::AND
&& Opc0
== ISD::SHL
)) {
5205 case 0xFF0000: MaskByteOffset
= 2; break;
5206 case 0xFF000000: MaskByteOffset
= 3; break;
5209 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5210 if (Opc
== ISD::AND
) {
5211 if (MaskByteOffset
== 0 || MaskByteOffset
== 2) {
5213 // (x >> 8) & 0xff0000
5214 if (Opc0
!= ISD::SRL
)
5216 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5217 if (!C
|| C
->getZExtValue() != 8)
5220 // (x << 8) & 0xff00
5221 // (x << 8) & 0xff000000
5222 if (Opc0
!= ISD::SHL
)
5224 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
5225 if (!C
|| C
->getZExtValue() != 8)
5228 } else if (Opc
== ISD::SHL
) {
5230 // (x & 0xff0000) << 8
5231 if (MaskByteOffset
!= 0 && MaskByteOffset
!= 2)
5233 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
5234 if (!C
|| C
->getZExtValue() != 8)
5236 } else { // Opc == ISD::SRL
5237 // (x & 0xff00) >> 8
5238 // (x & 0xff000000) >> 8
5239 if (MaskByteOffset
!= 1 && MaskByteOffset
!= 3)
5241 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
5242 if (!C
|| C
->getZExtValue() != 8)
5246 if (Parts
[MaskByteOffset
])
5249 Parts
[MaskByteOffset
] = N0
.getOperand(0).getNode();
5253 /// Match a 32-bit packed halfword bswap. That is
5254 /// ((x & 0x000000ff) << 8) |
5255 /// ((x & 0x0000ff00) >> 8) |
5256 /// ((x & 0x00ff0000) << 8) |
5257 /// ((x & 0xff000000) >> 8)
5258 /// => (rotl (bswap x), 16)
5259 SDValue
DAGCombiner::MatchBSwapHWord(SDNode
*N
, SDValue N0
, SDValue N1
) {
5260 if (!LegalOperations
)
5263 EVT VT
= N
->getValueType(0);
5266 if (!TLI
.isOperationLegalOrCustom(ISD::BSWAP
, VT
))
5270 // (or (or (and), (and)), (or (and), (and)))
5271 // (or (or (or (and), (and)), (and)), (and))
5272 if (N0
.getOpcode() != ISD::OR
)
5274 SDValue N00
= N0
.getOperand(0);
5275 SDValue N01
= N0
.getOperand(1);
5276 SDNode
*Parts
[4] = {};
5278 if (N1
.getOpcode() == ISD::OR
&&
5279 N00
.getNumOperands() == 2 && N01
.getNumOperands() == 2) {
5280 // (or (or (and), (and)), (or (and), (and)))
5281 if (!isBSwapHWordElement(N00
, Parts
))
5284 if (!isBSwapHWordElement(N01
, Parts
))
5286 SDValue N10
= N1
.getOperand(0);
5287 if (!isBSwapHWordElement(N10
, Parts
))
5289 SDValue N11
= N1
.getOperand(1);
5290 if (!isBSwapHWordElement(N11
, Parts
))
5293 // (or (or (or (and), (and)), (and)), (and))
5294 if (!isBSwapHWordElement(N1
, Parts
))
5296 if (!isBSwapHWordElement(N01
, Parts
))
5298 if (N00
.getOpcode() != ISD::OR
)
5300 SDValue N000
= N00
.getOperand(0);
5301 if (!isBSwapHWordElement(N000
, Parts
))
5303 SDValue N001
= N00
.getOperand(1);
5304 if (!isBSwapHWordElement(N001
, Parts
))
5308 // Make sure the parts are all coming from the same node.
5309 if (Parts
[0] != Parts
[1] || Parts
[0] != Parts
[2] || Parts
[0] != Parts
[3])
5313 SDValue BSwap
= DAG
.getNode(ISD::BSWAP
, DL
, VT
,
5314 SDValue(Parts
[0], 0));
5316 // Result of the bswap should be rotated by 16. If it's not legal, then
5317 // do (x << 16) | (x >> 16).
5318 SDValue ShAmt
= DAG
.getConstant(16, DL
, getShiftAmountTy(VT
));
5319 if (TLI
.isOperationLegalOrCustom(ISD::ROTL
, VT
))
5320 return DAG
.getNode(ISD::ROTL
, DL
, VT
, BSwap
, ShAmt
);
5321 if (TLI
.isOperationLegalOrCustom(ISD::ROTR
, VT
))
5322 return DAG
.getNode(ISD::ROTR
, DL
, VT
, BSwap
, ShAmt
);
5323 return DAG
.getNode(ISD::OR
, DL
, VT
,
5324 DAG
.getNode(ISD::SHL
, DL
, VT
, BSwap
, ShAmt
),
5325 DAG
.getNode(ISD::SRL
, DL
, VT
, BSwap
, ShAmt
));
5328 /// This contains all DAGCombine rules which reduce two values combined by
5329 /// an Or operation to a single value \see visitANDLike().
5330 SDValue
DAGCombiner::visitORLike(SDValue N0
, SDValue N1
, SDNode
*N
) {
5331 EVT VT
= N1
.getValueType();
5334 // fold (or x, undef) -> -1
5335 if (!LegalOperations
&& (N0
.isUndef() || N1
.isUndef()))
5336 return DAG
.getAllOnesConstant(DL
, VT
);
5338 if (SDValue V
= foldLogicOfSetCCs(false, N0
, N1
, DL
))
5341 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5342 if (N0
.getOpcode() == ISD::AND
&& N1
.getOpcode() == ISD::AND
&&
5343 // Don't increase # computations.
5344 (N0
.getNode()->hasOneUse() || N1
.getNode()->hasOneUse())) {
5345 // We can only do this xform if we know that bits from X that are set in C2
5346 // but not in C1 are already zero. Likewise for Y.
5347 if (const ConstantSDNode
*N0O1C
=
5348 getAsNonOpaqueConstant(N0
.getOperand(1))) {
5349 if (const ConstantSDNode
*N1O1C
=
5350 getAsNonOpaqueConstant(N1
.getOperand(1))) {
5351 // We can only do this xform if we know that bits from X that are set in
5352 // C2 but not in C1 are already zero. Likewise for Y.
5353 const APInt
&LHSMask
= N0O1C
->getAPIntValue();
5354 const APInt
&RHSMask
= N1O1C
->getAPIntValue();
5356 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), RHSMask
&~LHSMask
) &&
5357 DAG
.MaskedValueIsZero(N1
.getOperand(0), LHSMask
&~RHSMask
)) {
5358 SDValue X
= DAG
.getNode(ISD::OR
, SDLoc(N0
), VT
,
5359 N0
.getOperand(0), N1
.getOperand(0));
5360 return DAG
.getNode(ISD::AND
, DL
, VT
, X
,
5361 DAG
.getConstant(LHSMask
| RHSMask
, DL
, VT
));
5367 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5368 if (N0
.getOpcode() == ISD::AND
&&
5369 N1
.getOpcode() == ISD::AND
&&
5370 N0
.getOperand(0) == N1
.getOperand(0) &&
5371 // Don't increase # computations.
5372 (N0
.getNode()->hasOneUse() || N1
.getNode()->hasOneUse())) {
5373 SDValue X
= DAG
.getNode(ISD::OR
, SDLoc(N0
), VT
,
5374 N0
.getOperand(1), N1
.getOperand(1));
5375 return DAG
.getNode(ISD::AND
, DL
, VT
, N0
.getOperand(0), X
);
5381 /// OR combines for which the commuted variant will be tried as well.
5382 static SDValue
visitORCommutative(
5383 SelectionDAG
&DAG
, SDValue N0
, SDValue N1
, SDNode
*N
) {
5384 EVT VT
= N0
.getValueType();
5385 if (N0
.getOpcode() == ISD::AND
) {
5386 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5387 if (isBitwiseNot(N0
.getOperand(1)) && N0
.getOperand(1).getOperand(0) == N1
)
5388 return DAG
.getNode(ISD::OR
, SDLoc(N
), VT
, N0
.getOperand(0), N1
);
5390 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5391 if (isBitwiseNot(N0
.getOperand(0)) && N0
.getOperand(0).getOperand(0) == N1
)
5392 return DAG
.getNode(ISD::OR
, SDLoc(N
), VT
, N0
.getOperand(1), N1
);
5398 SDValue
DAGCombiner::visitOR(SDNode
*N
) {
5399 SDValue N0
= N
->getOperand(0);
5400 SDValue N1
= N
->getOperand(1);
5401 EVT VT
= N1
.getValueType();
5408 if (VT
.isVector()) {
5409 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
5412 // fold (or x, 0) -> x, vector edition
5413 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
5415 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
5418 // fold (or x, -1) -> -1, vector edition
5419 if (ISD::isBuildVectorAllOnes(N0
.getNode()))
5420 // do not return N0, because undef node may exist in N0
5421 return DAG
.getAllOnesConstant(SDLoc(N
), N0
.getValueType());
5422 if (ISD::isBuildVectorAllOnes(N1
.getNode()))
5423 // do not return N1, because undef node may exist in N1
5424 return DAG
.getAllOnesConstant(SDLoc(N
), N1
.getValueType());
5426 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5427 // Do this only if the resulting shuffle is legal.
5428 if (isa
<ShuffleVectorSDNode
>(N0
) &&
5429 isa
<ShuffleVectorSDNode
>(N1
) &&
5430 // Avoid folding a node with illegal type.
5431 TLI
.isTypeLegal(VT
)) {
5432 bool ZeroN00
= ISD::isBuildVectorAllZeros(N0
.getOperand(0).getNode());
5433 bool ZeroN01
= ISD::isBuildVectorAllZeros(N0
.getOperand(1).getNode());
5434 bool ZeroN10
= ISD::isBuildVectorAllZeros(N1
.getOperand(0).getNode());
5435 bool ZeroN11
= ISD::isBuildVectorAllZeros(N1
.getOperand(1).getNode());
5436 // Ensure both shuffles have a zero input.
5437 if ((ZeroN00
!= ZeroN01
) && (ZeroN10
!= ZeroN11
)) {
5438 assert((!ZeroN00
|| !ZeroN01
) && "Both inputs zero!");
5439 assert((!ZeroN10
|| !ZeroN11
) && "Both inputs zero!");
5440 const ShuffleVectorSDNode
*SV0
= cast
<ShuffleVectorSDNode
>(N0
);
5441 const ShuffleVectorSDNode
*SV1
= cast
<ShuffleVectorSDNode
>(N1
);
5442 bool CanFold
= true;
5443 int NumElts
= VT
.getVectorNumElements();
5444 SmallVector
<int, 4> Mask(NumElts
);
5446 for (int i
= 0; i
!= NumElts
; ++i
) {
5447 int M0
= SV0
->getMaskElt(i
);
5448 int M1
= SV1
->getMaskElt(i
);
5450 // Determine if either index is pointing to a zero vector.
5451 bool M0Zero
= M0
< 0 || (ZeroN00
== (M0
< NumElts
));
5452 bool M1Zero
= M1
< 0 || (ZeroN10
== (M1
< NumElts
));
5454 // If one element is zero and the otherside is undef, keep undef.
5455 // This also handles the case that both are undef.
5456 if ((M0Zero
&& M1
< 0) || (M1Zero
&& M0
< 0)) {
5461 // Make sure only one of the elements is zero.
5462 if (M0Zero
== M1Zero
) {
5467 assert((M0
>= 0 || M1
>= 0) && "Undef index!");
5469 // We have a zero and non-zero element. If the non-zero came from
5470 // SV0 make the index a LHS index. If it came from SV1, make it
5471 // a RHS index. We need to mod by NumElts because we don't care
5472 // which operand it came from in the original shuffles.
5473 Mask
[i
] = M1Zero
? M0
% NumElts
: (M1
% NumElts
) + NumElts
;
5477 SDValue NewLHS
= ZeroN00
? N0
.getOperand(1) : N0
.getOperand(0);
5478 SDValue NewRHS
= ZeroN10
? N1
.getOperand(1) : N1
.getOperand(0);
5480 bool LegalMask
= TLI
.isShuffleMaskLegal(Mask
, VT
);
5482 std::swap(NewLHS
, NewRHS
);
5483 ShuffleVectorSDNode::commuteMask(Mask
);
5484 LegalMask
= TLI
.isShuffleMaskLegal(Mask
, VT
);
5488 return DAG
.getVectorShuffle(VT
, SDLoc(N
), NewLHS
, NewRHS
, Mask
);
5494 // fold (or c1, c2) -> c1|c2
5495 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
5496 ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
);
5497 if (N0C
&& N1C
&& !N1C
->isOpaque())
5498 return DAG
.FoldConstantArithmetic(ISD::OR
, SDLoc(N
), VT
, N0C
, N1C
);
5499 // canonicalize constant to RHS
5500 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
5501 !DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
5502 return DAG
.getNode(ISD::OR
, SDLoc(N
), VT
, N1
, N0
);
5503 // fold (or x, 0) -> x
5504 if (isNullConstant(N1
))
5506 // fold (or x, -1) -> -1
5507 if (isAllOnesConstant(N1
))
5510 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
5513 // fold (or x, c) -> c iff (x & ~c) == 0
5514 if (N1C
&& DAG
.MaskedValueIsZero(N0
, ~N1C
->getAPIntValue()))
5517 if (SDValue Combined
= visitORLike(N0
, N1
, N
))
5520 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5521 if (SDValue BSwap
= MatchBSwapHWord(N
, N0
, N1
))
5523 if (SDValue BSwap
= MatchBSwapHWordLow(N
, N0
, N1
))
5527 if (SDValue ROR
= reassociateOps(ISD::OR
, SDLoc(N
), N0
, N1
, N
->getFlags()))
5530 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5531 // iff (c1 & c2) != 0 or c1/c2 are undef.
5532 auto MatchIntersect
= [](ConstantSDNode
*C1
, ConstantSDNode
*C2
) {
5533 return !C1
|| !C2
|| C1
->getAPIntValue().intersects(C2
->getAPIntValue());
5535 if (N0
.getOpcode() == ISD::AND
&& N0
.getNode()->hasOneUse() &&
5536 ISD::matchBinaryPredicate(N0
.getOperand(1), N1
, MatchIntersect
, true)) {
5537 if (SDValue COR
= DAG
.FoldConstantArithmetic(
5538 ISD::OR
, SDLoc(N1
), VT
, N1
.getNode(), N0
.getOperand(1).getNode())) {
5539 SDValue IOR
= DAG
.getNode(ISD::OR
, SDLoc(N0
), VT
, N0
.getOperand(0), N1
);
5540 AddToWorklist(IOR
.getNode());
5541 return DAG
.getNode(ISD::AND
, SDLoc(N
), VT
, COR
, IOR
);
5545 if (SDValue Combined
= visitORCommutative(DAG
, N0
, N1
, N
))
5547 if (SDValue Combined
= visitORCommutative(DAG
, N1
, N0
, N
))
5550 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
5551 if (N0
.getOpcode() == N1
.getOpcode())
5552 if (SDValue V
= hoistLogicOpWithSameOpcodeHands(N
))
5555 // See if this is some rotate idiom.
5556 if (SDNode
*Rot
= MatchRotate(N0
, N1
, SDLoc(N
)))
5557 return SDValue(Rot
, 0);
5559 if (SDValue Load
= MatchLoadCombine(N
))
5562 // Simplify the operands using demanded-bits information.
5563 if (SimplifyDemandedBits(SDValue(N
, 0)))
5564 return SDValue(N
, 0);
5566 // If OR can be rewritten into ADD, try combines based on ADD.
5567 if ((!LegalOperations
|| TLI
.isOperationLegal(ISD::ADD
, VT
)) &&
5568 DAG
.haveNoCommonBitsSet(N0
, N1
))
5569 if (SDValue Combined
= visitADDLike(N
))
5575 static SDValue
stripConstantMask(SelectionDAG
&DAG
, SDValue Op
, SDValue
&Mask
) {
5576 if (Op
.getOpcode() == ISD::AND
&&
5577 DAG
.isConstantIntBuildVectorOrConstantInt(Op
.getOperand(1))) {
5578 Mask
= Op
.getOperand(1);
5579 return Op
.getOperand(0);
5584 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5585 static bool matchRotateHalf(SelectionDAG
&DAG
, SDValue Op
, SDValue
&Shift
,
5587 Op
= stripConstantMask(DAG
, Op
, Mask
);
5588 if (Op
.getOpcode() == ISD::SRL
|| Op
.getOpcode() == ISD::SHL
) {
5595 /// Helper function for visitOR to extract the needed side of a rotate idiom
5596 /// from a shl/srl/mul/udiv. This is meant to handle cases where
5597 /// InstCombine merged some outside op with one of the shifts from
5598 /// the rotate pattern.
5599 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5600 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5603 /// (or (mul v c0) (shrl (mul v c1) c2)):
5604 /// expands (mul v c0) -> (shl (mul v c1) c3)
5606 /// (or (udiv v c0) (shl (udiv v c1) c2)):
5607 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
5609 /// (or (shl v c0) (shrl (shl v c1) c2)):
5610 /// expands (shl v c0) -> (shl (shl v c1) c3)
5612 /// (or (shrl v c0) (shl (shrl v c1) c2)):
5613 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
5615 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5616 static SDValue
extractShiftForRotate(SelectionDAG
&DAG
, SDValue OppShift
,
5617 SDValue ExtractFrom
, SDValue
&Mask
,
5619 assert(OppShift
&& ExtractFrom
&& "Empty SDValue");
5621 (OppShift
.getOpcode() == ISD::SHL
|| OppShift
.getOpcode() == ISD::SRL
) &&
5622 "Existing shift must be valid as a rotate half");
5624 ExtractFrom
= stripConstantMask(DAG
, ExtractFrom
, Mask
);
5626 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5628 // Find opcode of the needed shift to be extracted from (op0 v c0).
5629 unsigned Opcode
= ISD::DELETED_NODE
;
5630 bool IsMulOrDiv
= false;
5631 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5632 // opcode or its arithmetic (mul or udiv) variant.
5633 auto SelectOpcode
= [&](unsigned NeededShift
, unsigned MulOrDivVariant
) {
5634 IsMulOrDiv
= ExtractFrom
.getOpcode() == MulOrDivVariant
;
5635 if (!IsMulOrDiv
&& ExtractFrom
.getOpcode() != NeededShift
)
5637 Opcode
= NeededShift
;
5640 // op0 must be either the needed shift opcode or the mul/udiv equivalent
5641 // that the needed shift can be extracted from.
5642 if ((OppShift
.getOpcode() != ISD::SRL
|| !SelectOpcode(ISD::SHL
, ISD::MUL
)) &&
5643 (OppShift
.getOpcode() != ISD::SHL
|| !SelectOpcode(ISD::SRL
, ISD::UDIV
)))
5646 // op0 must be the same opcode on both sides, have the same LHS argument,
5647 // and produce the same value type.
5648 SDValue OppShiftLHS
= OppShift
.getOperand(0);
5649 EVT ShiftedVT
= OppShiftLHS
.getValueType();
5650 if (OppShiftLHS
.getOpcode() != ExtractFrom
.getOpcode() ||
5651 OppShiftLHS
.getOperand(0) != ExtractFrom
.getOperand(0) ||
5652 ShiftedVT
!= ExtractFrom
.getValueType())
5655 // Amount of the existing shift.
5656 ConstantSDNode
*OppShiftCst
= isConstOrConstSplat(OppShift
.getOperand(1));
5657 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5658 ConstantSDNode
*OppLHSCst
= isConstOrConstSplat(OppShiftLHS
.getOperand(1));
5659 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5660 ConstantSDNode
*ExtractFromCst
=
5661 isConstOrConstSplat(ExtractFrom
.getOperand(1));
5662 // TODO: We should be able to handle non-uniform constant vectors for these values
5663 // Check that we have constant values.
5664 if (!OppShiftCst
|| !OppShiftCst
->getAPIntValue() ||
5665 !OppLHSCst
|| !OppLHSCst
->getAPIntValue() ||
5666 !ExtractFromCst
|| !ExtractFromCst
->getAPIntValue())
5669 // Compute the shift amount we need to extract to complete the rotate.
5670 const unsigned VTWidth
= ShiftedVT
.getScalarSizeInBits();
5671 if (OppShiftCst
->getAPIntValue().ugt(VTWidth
))
5673 APInt NeededShiftAmt
= VTWidth
- OppShiftCst
->getAPIntValue();
5674 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5675 APInt ExtractFromAmt
= ExtractFromCst
->getAPIntValue();
5676 APInt OppLHSAmt
= OppLHSCst
->getAPIntValue();
5677 zeroExtendToMatch(ExtractFromAmt
, OppLHSAmt
);
5679 // Now try extract the needed shift from the ExtractFrom op and see if the
5680 // result matches up with the existing shift's LHS op.
5682 // Op to extract from is a mul or udiv by a constant.
5684 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5685 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5686 const APInt ExtractDiv
= APInt::getOneBitSet(ExtractFromAmt
.getBitWidth(),
5687 NeededShiftAmt
.getZExtValue());
5690 APInt::udivrem(ExtractFromAmt
, ExtractDiv
, ResultAmt
, Rem
);
5691 if (Rem
!= 0 || ResultAmt
!= OppLHSAmt
)
5694 // Op to extract from is a shift by a constant.
5696 // c2 - (bitwidth(op0 v c0) - c1) == c0
5697 if (OppLHSAmt
!= ExtractFromAmt
- NeededShiftAmt
.zextOrTrunc(
5698 ExtractFromAmt
.getBitWidth()))
5702 // Return the expanded shift op that should allow a rotate to be formed.
5703 EVT ShiftVT
= OppShift
.getOperand(1).getValueType();
5704 EVT ResVT
= ExtractFrom
.getValueType();
5705 SDValue NewShiftNode
= DAG
.getConstant(NeededShiftAmt
, DL
, ShiftVT
);
5706 return DAG
.getNode(Opcode
, DL
, ResVT
, OppShiftLHS
, NewShiftNode
);
5709 // Return true if we can prove that, whenever Neg and Pos are both in the
5710 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
5711 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5713 // (or (shift1 X, Neg), (shift2 X, Pos))
5715 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5716 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
5717 // to consider shift amounts with defined behavior.
5718 static bool matchRotateSub(SDValue Pos
, SDValue Neg
, unsigned EltSize
,
5719 SelectionDAG
&DAG
) {
5720 // If EltSize is a power of 2 then:
5722 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5723 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5725 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5726 // for the stronger condition:
5728 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
5730 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5731 // we can just replace Neg with Neg' for the rest of the function.
5733 // In other cases we check for the even stronger condition:
5735 // Neg == EltSize - Pos [B]
5737 // for all Neg and Pos. Note that the (or ...) then invokes undefined
5738 // behavior if Pos == 0 (and consequently Neg == EltSize).
5740 // We could actually use [A] whenever EltSize is a power of 2, but the
5741 // only extra cases that it would match are those uninteresting ones
5742 // where Neg and Pos are never in range at the same time. E.g. for
5743 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5744 // as well as (sub 32, Pos), but:
5746 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5748 // always invokes undefined behavior for 32-bit X.
5750 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5751 unsigned MaskLoBits
= 0;
5752 if (Neg
.getOpcode() == ISD::AND
&& isPowerOf2_64(EltSize
)) {
5753 if (ConstantSDNode
*NegC
= isConstOrConstSplat(Neg
.getOperand(1))) {
5754 KnownBits Known
= DAG
.computeKnownBits(Neg
.getOperand(0));
5755 unsigned Bits
= Log2_64(EltSize
);
5756 if (NegC
->getAPIntValue().getActiveBits() <= Bits
&&
5757 ((NegC
->getAPIntValue() | Known
.Zero
).countTrailingOnes() >= Bits
)) {
5758 Neg
= Neg
.getOperand(0);
5764 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5765 if (Neg
.getOpcode() != ISD::SUB
)
5767 ConstantSDNode
*NegC
= isConstOrConstSplat(Neg
.getOperand(0));
5770 SDValue NegOp1
= Neg
.getOperand(1);
5772 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5773 // Pos'. The truncation is redundant for the purpose of the equality.
5774 if (MaskLoBits
&& Pos
.getOpcode() == ISD::AND
) {
5775 if (ConstantSDNode
*PosC
= isConstOrConstSplat(Pos
.getOperand(1))) {
5776 KnownBits Known
= DAG
.computeKnownBits(Pos
.getOperand(0));
5777 if (PosC
->getAPIntValue().getActiveBits() <= MaskLoBits
&&
5778 ((PosC
->getAPIntValue() | Known
.Zero
).countTrailingOnes() >=
5780 Pos
= Pos
.getOperand(0);
5784 // The condition we need is now:
5786 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5788 // If NegOp1 == Pos then we need:
5790 // EltSize & Mask == NegC & Mask
5792 // (because "x & Mask" is a truncation and distributes through subtraction).
5795 Width
= NegC
->getAPIntValue();
5797 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5798 // Then the condition we want to prove becomes:
5800 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5802 // which, again because "x & Mask" is a truncation, becomes:
5804 // NegC & Mask == (EltSize - PosC) & Mask
5805 // EltSize & Mask == (NegC + PosC) & Mask
5806 else if (Pos
.getOpcode() == ISD::ADD
&& Pos
.getOperand(0) == NegOp1
) {
5807 if (ConstantSDNode
*PosC
= isConstOrConstSplat(Pos
.getOperand(1)))
5808 Width
= PosC
->getAPIntValue() + NegC
->getAPIntValue();
5814 // Now we just need to check that EltSize & Mask == Width & Mask.
5816 // EltSize & Mask is 0 since Mask is EltSize - 1.
5817 return Width
.getLoBits(MaskLoBits
) == 0;
5818 return Width
== EltSize
;
5821 // A subroutine of MatchRotate used once we have found an OR of two opposite
5822 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
5823 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5824 // former being preferred if supported. InnerPos and InnerNeg are Pos and
5825 // Neg with outer conversions stripped away.
5826 SDNode
*DAGCombiner::MatchRotatePosNeg(SDValue Shifted
, SDValue Pos
,
5827 SDValue Neg
, SDValue InnerPos
,
5828 SDValue InnerNeg
, unsigned PosOpcode
,
5829 unsigned NegOpcode
, const SDLoc
&DL
) {
5830 // fold (or (shl x, (*ext y)),
5831 // (srl x, (*ext (sub 32, y)))) ->
5832 // (rotl x, y) or (rotr x, (sub 32, y))
5834 // fold (or (shl x, (*ext (sub 32, y))),
5835 // (srl x, (*ext y))) ->
5836 // (rotr x, y) or (rotl x, (sub 32, y))
5837 EVT VT
= Shifted
.getValueType();
5838 if (matchRotateSub(InnerPos
, InnerNeg
, VT
.getScalarSizeInBits(), DAG
)) {
5839 bool HasPos
= TLI
.isOperationLegalOrCustom(PosOpcode
, VT
);
5840 return DAG
.getNode(HasPos
? PosOpcode
: NegOpcode
, DL
, VT
, Shifted
,
5841 HasPos
? Pos
: Neg
).getNode();
5847 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
5848 // idioms for rotate, and if the target supports rotation instructions, generate
5850 SDNode
*DAGCombiner::MatchRotate(SDValue LHS
, SDValue RHS
, const SDLoc
&DL
) {
5851 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
5852 EVT VT
= LHS
.getValueType();
5853 if (!TLI
.isTypeLegal(VT
)) return nullptr;
5855 // The target must have at least one rotate flavor.
5856 bool HasROTL
= hasOperation(ISD::ROTL
, VT
);
5857 bool HasROTR
= hasOperation(ISD::ROTR
, VT
);
5858 if (!HasROTL
&& !HasROTR
) return nullptr;
5860 // Check for truncated rotate.
5861 if (LHS
.getOpcode() == ISD::TRUNCATE
&& RHS
.getOpcode() == ISD::TRUNCATE
&&
5862 LHS
.getOperand(0).getValueType() == RHS
.getOperand(0).getValueType()) {
5863 assert(LHS
.getValueType() == RHS
.getValueType());
5864 if (SDNode
*Rot
= MatchRotate(LHS
.getOperand(0), RHS
.getOperand(0), DL
)) {
5865 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(LHS
), LHS
.getValueType(),
5866 SDValue(Rot
, 0)).getNode();
5870 // Match "(X shl/srl V1) & V2" where V2 may not be present.
5871 SDValue LHSShift
; // The shift.
5872 SDValue LHSMask
; // AND value if any.
5873 matchRotateHalf(DAG
, LHS
, LHSShift
, LHSMask
);
5875 SDValue RHSShift
; // The shift.
5876 SDValue RHSMask
; // AND value if any.
5877 matchRotateHalf(DAG
, RHS
, RHSShift
, RHSMask
);
5879 // If neither side matched a rotate half, bail
5880 if (!LHSShift
&& !RHSShift
)
5883 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5884 // side of the rotate, so try to handle that here. In all cases we need to
5885 // pass the matched shift from the opposite side to compute the opcode and
5886 // needed shift amount to extract. We still want to do this if both sides
5887 // matched a rotate half because one half may be a potential overshift that
5888 // can be broken down (ie if InstCombine merged two shl or srl ops into a
5891 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5893 if (SDValue NewRHSShift
=
5894 extractShiftForRotate(DAG
, LHSShift
, RHS
, RHSMask
, DL
))
5895 RHSShift
= NewRHSShift
;
5896 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5898 if (SDValue NewLHSShift
=
5899 extractShiftForRotate(DAG
, RHSShift
, LHS
, LHSMask
, DL
))
5900 LHSShift
= NewLHSShift
;
5902 // If a side is still missing, nothing else we can do.
5903 if (!RHSShift
|| !LHSShift
)
5906 // At this point we've matched or extracted a shift op on each side.
5908 if (LHSShift
.getOperand(0) != RHSShift
.getOperand(0))
5909 return nullptr; // Not shifting the same value.
5911 if (LHSShift
.getOpcode() == RHSShift
.getOpcode())
5912 return nullptr; // Shifts must disagree.
5914 // Canonicalize shl to left side in a shl/srl pair.
5915 if (RHSShift
.getOpcode() == ISD::SHL
) {
5916 std::swap(LHS
, RHS
);
5917 std::swap(LHSShift
, RHSShift
);
5918 std::swap(LHSMask
, RHSMask
);
5921 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
5922 SDValue LHSShiftArg
= LHSShift
.getOperand(0);
5923 SDValue LHSShiftAmt
= LHSShift
.getOperand(1);
5924 SDValue RHSShiftArg
= RHSShift
.getOperand(0);
5925 SDValue RHSShiftAmt
= RHSShift
.getOperand(1);
5927 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5928 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5929 auto MatchRotateSum
= [EltSizeInBits
](ConstantSDNode
*LHS
,
5930 ConstantSDNode
*RHS
) {
5931 return (LHS
->getAPIntValue() + RHS
->getAPIntValue()) == EltSizeInBits
;
5933 if (ISD::matchBinaryPredicate(LHSShiftAmt
, RHSShiftAmt
, MatchRotateSum
)) {
5934 SDValue Rot
= DAG
.getNode(HasROTL
? ISD::ROTL
: ISD::ROTR
, DL
, VT
,
5935 LHSShiftArg
, HasROTL
? LHSShiftAmt
: RHSShiftAmt
);
5937 // If there is an AND of either shifted operand, apply it to the result.
5938 if (LHSMask
.getNode() || RHSMask
.getNode()) {
5939 SDValue AllOnes
= DAG
.getAllOnesConstant(DL
, VT
);
5940 SDValue Mask
= AllOnes
;
5942 if (LHSMask
.getNode()) {
5943 SDValue RHSBits
= DAG
.getNode(ISD::SRL
, DL
, VT
, AllOnes
, RHSShiftAmt
);
5944 Mask
= DAG
.getNode(ISD::AND
, DL
, VT
, Mask
,
5945 DAG
.getNode(ISD::OR
, DL
, VT
, LHSMask
, RHSBits
));
5947 if (RHSMask
.getNode()) {
5948 SDValue LHSBits
= DAG
.getNode(ISD::SHL
, DL
, VT
, AllOnes
, LHSShiftAmt
);
5949 Mask
= DAG
.getNode(ISD::AND
, DL
, VT
, Mask
,
5950 DAG
.getNode(ISD::OR
, DL
, VT
, RHSMask
, LHSBits
));
5953 Rot
= DAG
.getNode(ISD::AND
, DL
, VT
, Rot
, Mask
);
5956 return Rot
.getNode();
5959 // If there is a mask here, and we have a variable shift, we can't be sure
5960 // that we're masking out the right stuff.
5961 if (LHSMask
.getNode() || RHSMask
.getNode())
5964 // If the shift amount is sign/zext/any-extended just peel it off.
5965 SDValue LExtOp0
= LHSShiftAmt
;
5966 SDValue RExtOp0
= RHSShiftAmt
;
5967 if ((LHSShiftAmt
.getOpcode() == ISD::SIGN_EXTEND
||
5968 LHSShiftAmt
.getOpcode() == ISD::ZERO_EXTEND
||
5969 LHSShiftAmt
.getOpcode() == ISD::ANY_EXTEND
||
5970 LHSShiftAmt
.getOpcode() == ISD::TRUNCATE
) &&
5971 (RHSShiftAmt
.getOpcode() == ISD::SIGN_EXTEND
||
5972 RHSShiftAmt
.getOpcode() == ISD::ZERO_EXTEND
||
5973 RHSShiftAmt
.getOpcode() == ISD::ANY_EXTEND
||
5974 RHSShiftAmt
.getOpcode() == ISD::TRUNCATE
)) {
5975 LExtOp0
= LHSShiftAmt
.getOperand(0);
5976 RExtOp0
= RHSShiftAmt
.getOperand(0);
5979 SDNode
*TryL
= MatchRotatePosNeg(LHSShiftArg
, LHSShiftAmt
, RHSShiftAmt
,
5980 LExtOp0
, RExtOp0
, ISD::ROTL
, ISD::ROTR
, DL
);
5984 SDNode
*TryR
= MatchRotatePosNeg(RHSShiftArg
, RHSShiftAmt
, LHSShiftAmt
,
5985 RExtOp0
, LExtOp0
, ISD::ROTR
, ISD::ROTL
, DL
);
5994 /// Represents known origin of an individual byte in load combine pattern. The
5995 /// value of the byte is either constant zero or comes from memory.
5996 struct ByteProvider
{
5997 // For constant zero providers Load is set to nullptr. For memory providers
5998 // Load represents the node which loads the byte from memory.
5999 // ByteOffset is the offset of the byte in the value produced by the load.
6000 LoadSDNode
*Load
= nullptr;
6001 unsigned ByteOffset
= 0;
6003 ByteProvider() = default;
6005 static ByteProvider
getMemory(LoadSDNode
*Load
, unsigned ByteOffset
) {
6006 return ByteProvider(Load
, ByteOffset
);
6009 static ByteProvider
getConstantZero() { return ByteProvider(nullptr, 0); }
6011 bool isConstantZero() const { return !Load
; }
6012 bool isMemory() const { return Load
; }
6014 bool operator==(const ByteProvider
&Other
) const {
6015 return Other
.Load
== Load
&& Other
.ByteOffset
== ByteOffset
;
6019 ByteProvider(LoadSDNode
*Load
, unsigned ByteOffset
)
6020 : Load(Load
), ByteOffset(ByteOffset
) {}
6023 } // end anonymous namespace
6025 /// Recursively traverses the expression calculating the origin of the requested
6026 /// byte of the given value. Returns None if the provider can't be calculated.
6028 /// For all the values except the root of the expression verifies that the value
6029 /// has exactly one use and if it's not true return None. This way if the origin
6030 /// of the byte is returned it's guaranteed that the values which contribute to
6031 /// the byte are not used outside of this expression.
6033 /// Because the parts of the expression are not allowed to have more than one
6034 /// use this function iterates over trees, not DAGs. So it never visits the same
6035 /// node more than once.
6036 static const Optional
<ByteProvider
>
6037 calculateByteProvider(SDValue Op
, unsigned Index
, unsigned Depth
,
6038 bool Root
= false) {
6039 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6043 if (!Root
&& !Op
.hasOneUse())
6046 assert(Op
.getValueType().isScalarInteger() && "can't handle other types");
6047 unsigned BitWidth
= Op
.getValueSizeInBits();
6048 if (BitWidth
% 8 != 0)
6050 unsigned ByteWidth
= BitWidth
/ 8;
6051 assert(Index
< ByteWidth
&& "invalid index requested");
6054 switch (Op
.getOpcode()) {
6056 auto LHS
= calculateByteProvider(Op
->getOperand(0), Index
, Depth
+ 1);
6059 auto RHS
= calculateByteProvider(Op
->getOperand(1), Index
, Depth
+ 1);
6063 if (LHS
->isConstantZero())
6065 if (RHS
->isConstantZero())
6070 auto ShiftOp
= dyn_cast
<ConstantSDNode
>(Op
->getOperand(1));
6074 uint64_t BitShift
= ShiftOp
->getZExtValue();
6075 if (BitShift
% 8 != 0)
6077 uint64_t ByteShift
= BitShift
/ 8;
6079 return Index
< ByteShift
6080 ? ByteProvider::getConstantZero()
6081 : calculateByteProvider(Op
->getOperand(0), Index
- ByteShift
,
6084 case ISD::ANY_EXTEND
:
6085 case ISD::SIGN_EXTEND
:
6086 case ISD::ZERO_EXTEND
: {
6087 SDValue NarrowOp
= Op
->getOperand(0);
6088 unsigned NarrowBitWidth
= NarrowOp
.getScalarValueSizeInBits();
6089 if (NarrowBitWidth
% 8 != 0)
6091 uint64_t NarrowByteWidth
= NarrowBitWidth
/ 8;
6093 if (Index
>= NarrowByteWidth
)
6094 return Op
.getOpcode() == ISD::ZERO_EXTEND
6095 ? Optional
<ByteProvider
>(ByteProvider::getConstantZero())
6097 return calculateByteProvider(NarrowOp
, Index
, Depth
+ 1);
6100 return calculateByteProvider(Op
->getOperand(0), ByteWidth
- Index
- 1,
6103 auto L
= cast
<LoadSDNode
>(Op
.getNode());
6104 if (L
->isVolatile() || L
->isIndexed())
6107 unsigned NarrowBitWidth
= L
->getMemoryVT().getSizeInBits();
6108 if (NarrowBitWidth
% 8 != 0)
6110 uint64_t NarrowByteWidth
= NarrowBitWidth
/ 8;
6112 if (Index
>= NarrowByteWidth
)
6113 return L
->getExtensionType() == ISD::ZEXTLOAD
6114 ? Optional
<ByteProvider
>(ByteProvider::getConstantZero())
6116 return ByteProvider::getMemory(L
, Index
);
6123 static unsigned LittleEndianByteAt(unsigned BW
, unsigned i
) {
6127 static unsigned BigEndianByteAt(unsigned BW
, unsigned i
) {
6131 // Check if the bytes offsets we are looking at match with either big or
6132 // little endian value loaded. Return true for big endian, false for little
6133 // endian, and None if match failed.
6134 static Optional
<bool> isBigEndian(const SmallVector
<int64_t, 4> &ByteOffsets
,
6135 int64_t FirstOffset
) {
6136 // The endian can be decided only when it is 2 bytes at least.
6137 unsigned Width
= ByteOffsets
.size();
6141 bool BigEndian
= true, LittleEndian
= true;
6142 for (unsigned i
= 0; i
< Width
; i
++) {
6143 int64_t CurrentByteOffset
= ByteOffsets
[i
] - FirstOffset
;
6144 LittleEndian
&= CurrentByteOffset
== LittleEndianByteAt(Width
, i
);
6145 BigEndian
&= CurrentByteOffset
== BigEndianByteAt(Width
, i
);
6146 if (!BigEndian
&& !LittleEndian
)
6150 assert((BigEndian
!= LittleEndian
) && "It should be either big endian or"
6155 /// Match a pattern where a wide type scalar value is loaded by several narrow
6156 /// loads and combined by shifts and ors. Fold it into a single load or a load
6157 /// and a BSWAP if the targets supports it.
6159 /// Assuming little endian target:
6161 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6163 /// i32 val = *((i32)a)
6166 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6168 /// i32 val = BSWAP(*((i32)a))
6170 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6171 /// interact well with the worklist mechanism. When a part of the pattern is
6172 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6173 /// but the root node of the pattern which triggers the load combine is not
6174 /// necessarily a direct user of the changed node. For example, once the address
6175 /// of t28 load is reassociated load combine won't be triggered:
6176 /// t25: i32 = add t4, Constant:i32<2>
6177 /// t26: i64 = sign_extend t25
6178 /// t27: i64 = add t2, t26
6179 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6180 /// t29: i32 = zero_extend t28
6181 /// t32: i32 = shl t29, Constant:i8<8>
6182 /// t33: i32 = or t23, t32
6183 /// As a possible fix visitLoad can check if the load can be a part of a load
6184 /// combine pattern and add corresponding OR roots to the worklist.
6185 SDValue
DAGCombiner::MatchLoadCombine(SDNode
*N
) {
6186 assert(N
->getOpcode() == ISD::OR
&&
6187 "Can only match load combining against OR nodes");
6189 // Handles simple types only
6190 EVT VT
= N
->getValueType(0);
6191 if (VT
!= MVT::i16
&& VT
!= MVT::i32
&& VT
!= MVT::i64
)
6193 unsigned ByteWidth
= VT
.getSizeInBits() / 8;
6195 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
6196 // Before legalize we can introduce too wide illegal loads which will be later
6197 // split into legal sized loads. This enables us to combine i64 load by i8
6198 // patterns to a couple of i32 loads on 32 bit targets.
6199 if (LegalOperations
&& !TLI
.isOperationLegal(ISD::LOAD
, VT
))
6202 bool IsBigEndianTarget
= DAG
.getDataLayout().isBigEndian();
6203 auto MemoryByteOffset
= [&] (ByteProvider P
) {
6204 assert(P
.isMemory() && "Must be a memory byte provider");
6205 unsigned LoadBitWidth
= P
.Load
->getMemoryVT().getSizeInBits();
6206 assert(LoadBitWidth
% 8 == 0 &&
6207 "can only analyze providers for individual bytes not bit");
6208 unsigned LoadByteWidth
= LoadBitWidth
/ 8;
6209 return IsBigEndianTarget
6210 ? BigEndianByteAt(LoadByteWidth
, P
.ByteOffset
)
6211 : LittleEndianByteAt(LoadByteWidth
, P
.ByteOffset
);
6214 Optional
<BaseIndexOffset
> Base
;
6217 SmallPtrSet
<LoadSDNode
*, 8> Loads
;
6218 Optional
<ByteProvider
> FirstByteProvider
;
6219 int64_t FirstOffset
= INT64_MAX
;
6221 // Check if all the bytes of the OR we are looking at are loaded from the same
6222 // base address. Collect bytes offsets from Base address in ByteOffsets.
6223 SmallVector
<int64_t, 4> ByteOffsets(ByteWidth
);
6224 for (unsigned i
= 0; i
< ByteWidth
; i
++) {
6225 auto P
= calculateByteProvider(SDValue(N
, 0), i
, 0, /*Root=*/true);
6226 if (!P
|| !P
->isMemory()) // All the bytes must be loaded from memory
6229 LoadSDNode
*L
= P
->Load
;
6230 assert(L
->hasNUsesOfValue(1, 0) && !L
->isVolatile() && !L
->isIndexed() &&
6231 "Must be enforced by calculateByteProvider");
6232 assert(L
->getOffset().isUndef() && "Unindexed load must have undef offset");
6234 // All loads must share the same chain
6235 SDValue LChain
= L
->getChain();
6238 else if (Chain
!= LChain
)
6241 // Loads must share the same base address
6242 BaseIndexOffset Ptr
= BaseIndexOffset::match(L
, DAG
);
6243 int64_t ByteOffsetFromBase
= 0;
6246 else if (!Base
->equalBaseIndex(Ptr
, DAG
, ByteOffsetFromBase
))
6249 // Calculate the offset of the current byte from the base address
6250 ByteOffsetFromBase
+= MemoryByteOffset(*P
);
6251 ByteOffsets
[i
] = ByteOffsetFromBase
;
6253 // Remember the first byte load
6254 if (ByteOffsetFromBase
< FirstOffset
) {
6255 FirstByteProvider
= P
;
6256 FirstOffset
= ByteOffsetFromBase
;
6261 assert(!Loads
.empty() && "All the bytes of the value must be loaded from "
6262 "memory, so there must be at least one load which produces the value");
6263 assert(Base
&& "Base address of the accessed memory location must be set");
6264 assert(FirstOffset
!= INT64_MAX
&& "First byte offset must be set");
6266 // Check if the bytes of the OR we are looking at match with either big or
6267 // little endian value load
6268 Optional
<bool> IsBigEndian
= isBigEndian(ByteOffsets
, FirstOffset
);
6269 if (!IsBigEndian
.hasValue())
6272 assert(FirstByteProvider
&& "must be set");
6274 // Ensure that the first byte is loaded from zero offset of the first load.
6275 // So the combined value can be loaded from the first load address.
6276 if (MemoryByteOffset(*FirstByteProvider
) != 0)
6278 LoadSDNode
*FirstLoad
= FirstByteProvider
->Load
;
6280 // The node we are looking at matches with the pattern, check if we can
6281 // replace it with a single load and bswap if needed.
6283 // If the load needs byte swap check if the target supports it
6284 bool NeedsBswap
= IsBigEndianTarget
!= *IsBigEndian
;
6286 // Before legalize we can introduce illegal bswaps which will be later
6287 // converted to an explicit bswap sequence. This way we end up with a single
6288 // load and byte shuffling instead of several loads and byte shuffling.
6289 if (NeedsBswap
&& LegalOperations
&& !TLI
.isOperationLegal(ISD::BSWAP
, VT
))
6292 // Check that a load of the wide type is both allowed and fast on the target
6294 bool Allowed
= TLI
.allowsMemoryAccess(*DAG
.getContext(), DAG
.getDataLayout(),
6295 VT
, FirstLoad
->getAddressSpace(),
6296 FirstLoad
->getAlignment(), &Fast
);
6297 if (!Allowed
|| !Fast
)
6301 DAG
.getLoad(VT
, SDLoc(N
), Chain
, FirstLoad
->getBasePtr(),
6302 FirstLoad
->getPointerInfo(), FirstLoad
->getAlignment());
6304 // Transfer chain users from old loads to the new load.
6305 for (LoadSDNode
*L
: Loads
)
6306 DAG
.ReplaceAllUsesOfValueWith(SDValue(L
, 1), SDValue(NewLoad
.getNode(), 1));
6308 return NeedsBswap
? DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VT
, NewLoad
) : NewLoad
;
6311 // If the target has andn, bsl, or a similar bit-select instruction,
6312 // we want to unfold masked merge, with canonical pattern of:
6314 // ((x ^ y) & m) ^ y
6317 // (x & m) | (y & ~m)
6318 // If y is a constant, and the 'andn' does not work with immediates,
6319 // we unfold into a different pattern:
6320 // ~(~x & m) & (m | y)
6321 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6322 // the very least that breaks andnpd / andnps patterns, and because those
6323 // patterns are simplified in IR and shouldn't be created in the DAG
6324 SDValue
DAGCombiner::unfoldMaskedMerge(SDNode
*N
) {
6325 assert(N
->getOpcode() == ISD::XOR
);
6327 // Don't touch 'not' (i.e. where y = -1).
6328 if (isAllOnesOrAllOnesSplat(N
->getOperand(1)))
6331 EVT VT
= N
->getValueType(0);
6333 // There are 3 commutable operators in the pattern,
6334 // so we have to deal with 8 possible variants of the basic pattern.
6336 auto matchAndXor
= [&X
, &Y
, &M
](SDValue And
, unsigned XorIdx
, SDValue Other
) {
6337 if (And
.getOpcode() != ISD::AND
|| !And
.hasOneUse())
6339 SDValue Xor
= And
.getOperand(XorIdx
);
6340 if (Xor
.getOpcode() != ISD::XOR
|| !Xor
.hasOneUse())
6342 SDValue Xor0
= Xor
.getOperand(0);
6343 SDValue Xor1
= Xor
.getOperand(1);
6344 // Don't touch 'not' (i.e. where y = -1).
6345 if (isAllOnesOrAllOnesSplat(Xor1
))
6348 std::swap(Xor0
, Xor1
);
6353 M
= And
.getOperand(XorIdx
? 0 : 1);
6357 SDValue N0
= N
->getOperand(0);
6358 SDValue N1
= N
->getOperand(1);
6359 if (!matchAndXor(N0
, 0, N1
) && !matchAndXor(N0
, 1, N1
) &&
6360 !matchAndXor(N1
, 0, N0
) && !matchAndXor(N1
, 1, N0
))
6363 // Don't do anything if the mask is constant. This should not be reachable.
6364 // InstCombine should have already unfolded this pattern, and DAGCombiner
6365 // probably shouldn't produce it, too.
6366 if (isa
<ConstantSDNode
>(M
.getNode()))
6369 // We can transform if the target has AndNot
6370 if (!TLI
.hasAndNot(M
))
6375 // If Y is a constant, check that 'andn' works with immediates.
6376 if (!TLI
.hasAndNot(Y
)) {
6377 assert(TLI
.hasAndNot(X
) && "Only mask is a variable? Unreachable.");
6378 // If not, we need to do a bit more work to make sure andn is still used.
6379 SDValue NotX
= DAG
.getNOT(DL
, X
, VT
);
6380 SDValue LHS
= DAG
.getNode(ISD::AND
, DL
, VT
, NotX
, M
);
6381 SDValue NotLHS
= DAG
.getNOT(DL
, LHS
, VT
);
6382 SDValue RHS
= DAG
.getNode(ISD::OR
, DL
, VT
, M
, Y
);
6383 return DAG
.getNode(ISD::AND
, DL
, VT
, NotLHS
, RHS
);
6386 SDValue LHS
= DAG
.getNode(ISD::AND
, DL
, VT
, X
, M
);
6387 SDValue NotM
= DAG
.getNOT(DL
, M
, VT
);
6388 SDValue RHS
= DAG
.getNode(ISD::AND
, DL
, VT
, Y
, NotM
);
6390 return DAG
.getNode(ISD::OR
, DL
, VT
, LHS
, RHS
);
6393 SDValue
DAGCombiner::visitXOR(SDNode
*N
) {
6394 SDValue N0
= N
->getOperand(0);
6395 SDValue N1
= N
->getOperand(1);
6396 EVT VT
= N0
.getValueType();
6399 if (VT
.isVector()) {
6400 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
6403 // fold (xor x, 0) -> x, vector edition
6404 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
6406 if (ISD::isBuildVectorAllZeros(N1
.getNode()))
6410 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6412 if (N0
.isUndef() && N1
.isUndef())
6413 return DAG
.getConstant(0, DL
, VT
);
6414 // fold (xor x, undef) -> undef
6419 // fold (xor c1, c2) -> c1^c2
6420 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
6421 ConstantSDNode
*N1C
= getAsNonOpaqueConstant(N1
);
6423 return DAG
.FoldConstantArithmetic(ISD::XOR
, DL
, VT
, N0C
, N1C
);
6424 // canonicalize constant to RHS
6425 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
6426 !DAG
.isConstantIntBuildVectorOrConstantInt(N1
))
6427 return DAG
.getNode(ISD::XOR
, DL
, VT
, N1
, N0
);
6428 // fold (xor x, 0) -> x
6429 if (isNullConstant(N1
))
6432 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
6436 if (SDValue RXOR
= reassociateOps(ISD::XOR
, DL
, N0
, N1
, N
->getFlags()))
6439 // fold !(x cc y) -> (x !cc y)
6440 unsigned N0Opcode
= N0
.getOpcode();
6441 SDValue LHS
, RHS
, CC
;
6442 if (TLI
.isConstTrueVal(N1
.getNode()) && isSetCCEquivalent(N0
, LHS
, RHS
, CC
)) {
6443 ISD::CondCode NotCC
= ISD::getSetCCInverse(cast
<CondCodeSDNode
>(CC
)->get(),
6444 LHS
.getValueType().isInteger());
6445 if (!LegalOperations
||
6446 TLI
.isCondCodeLegal(NotCC
, LHS
.getSimpleValueType())) {
6449 llvm_unreachable("Unhandled SetCC Equivalent!");
6451 return DAG
.getSetCC(SDLoc(N0
), VT
, LHS
, RHS
, NotCC
);
6452 case ISD::SELECT_CC
:
6453 return DAG
.getSelectCC(SDLoc(N0
), LHS
, RHS
, N0
.getOperand(2),
6454 N0
.getOperand(3), NotCC
);
6459 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6460 if (isOneConstant(N1
) && N0Opcode
== ISD::ZERO_EXTEND
&& N0
.hasOneUse() &&
6461 isSetCCEquivalent(N0
.getOperand(0), LHS
, RHS
, CC
)){
6462 SDValue V
= N0
.getOperand(0);
6464 V
= DAG
.getNode(ISD::XOR
, DL0
, V
.getValueType(), V
,
6465 DAG
.getConstant(1, DL0
, V
.getValueType()));
6466 AddToWorklist(V
.getNode());
6467 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, V
);
6470 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6471 if (isOneConstant(N1
) && VT
== MVT::i1
&& N0
.hasOneUse() &&
6472 (N0Opcode
== ISD::OR
|| N0Opcode
== ISD::AND
)) {
6473 SDValue LHS
= N0
.getOperand(0), RHS
= N0
.getOperand(1);
6474 if (isOneUseSetCC(RHS
) || isOneUseSetCC(LHS
)) {
6475 unsigned NewOpcode
= N0Opcode
== ISD::AND
? ISD::OR
: ISD::AND
;
6476 LHS
= DAG
.getNode(ISD::XOR
, SDLoc(LHS
), VT
, LHS
, N1
); // LHS = ~LHS
6477 RHS
= DAG
.getNode(ISD::XOR
, SDLoc(RHS
), VT
, RHS
, N1
); // RHS = ~RHS
6478 AddToWorklist(LHS
.getNode()); AddToWorklist(RHS
.getNode());
6479 return DAG
.getNode(NewOpcode
, DL
, VT
, LHS
, RHS
);
6482 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6483 if (isAllOnesConstant(N1
) && N0
.hasOneUse() &&
6484 (N0Opcode
== ISD::OR
|| N0Opcode
== ISD::AND
)) {
6485 SDValue LHS
= N0
.getOperand(0), RHS
= N0
.getOperand(1);
6486 if (isa
<ConstantSDNode
>(RHS
) || isa
<ConstantSDNode
>(LHS
)) {
6487 unsigned NewOpcode
= N0Opcode
== ISD::AND
? ISD::OR
: ISD::AND
;
6488 LHS
= DAG
.getNode(ISD::XOR
, SDLoc(LHS
), VT
, LHS
, N1
); // LHS = ~LHS
6489 RHS
= DAG
.getNode(ISD::XOR
, SDLoc(RHS
), VT
, RHS
, N1
); // RHS = ~RHS
6490 AddToWorklist(LHS
.getNode()); AddToWorklist(RHS
.getNode());
6491 return DAG
.getNode(NewOpcode
, DL
, VT
, LHS
, RHS
);
6494 // fold (xor (and x, y), y) -> (and (not x), y)
6495 if (N0Opcode
== ISD::AND
&& N0
.hasOneUse() && N0
->getOperand(1) == N1
) {
6496 SDValue X
= N0
.getOperand(0);
6497 SDValue NotX
= DAG
.getNOT(SDLoc(X
), X
, VT
);
6498 AddToWorklist(NotX
.getNode());
6499 return DAG
.getNode(ISD::AND
, DL
, VT
, NotX
, N1
);
6502 if ((N0Opcode
== ISD::SRL
|| N0Opcode
== ISD::SHL
) && N0
.hasOneUse()) {
6503 ConstantSDNode
*XorC
= isConstOrConstSplat(N1
);
6504 ConstantSDNode
*ShiftC
= isConstOrConstSplat(N0
.getOperand(1));
6505 unsigned BitWidth
= VT
.getScalarSizeInBits();
6506 if (XorC
&& ShiftC
) {
6507 // Don't crash on an oversized shift. We can not guarantee that a bogus
6508 // shift has been simplified to undef.
6509 uint64_t ShiftAmt
= ShiftC
->getLimitedValue();
6510 if (ShiftAmt
< BitWidth
) {
6511 APInt Ones
= APInt::getAllOnesValue(BitWidth
);
6512 Ones
= N0Opcode
== ISD::SHL
? Ones
.shl(ShiftAmt
) : Ones
.lshr(ShiftAmt
);
6513 if (XorC
->getAPIntValue() == Ones
) {
6514 // If the xor constant is a shifted -1, do a 'not' before the shift:
6515 // xor (X << ShiftC), XorC --> (not X) << ShiftC
6516 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6517 SDValue Not
= DAG
.getNOT(DL
, N0
.getOperand(0), VT
);
6518 return DAG
.getNode(N0Opcode
, DL
, VT
, Not
, N0
.getOperand(1));
6524 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6525 if (TLI
.isOperationLegalOrCustom(ISD::ABS
, VT
)) {
6526 SDValue A
= N0Opcode
== ISD::ADD
? N0
: N1
;
6527 SDValue S
= N0Opcode
== ISD::SRA
? N0
: N1
;
6528 if (A
.getOpcode() == ISD::ADD
&& S
.getOpcode() == ISD::SRA
) {
6529 SDValue A0
= A
.getOperand(0), A1
= A
.getOperand(1);
6530 SDValue S0
= S
.getOperand(0);
6531 if ((A0
== S
&& A1
== S0
) || (A1
== S
&& A0
== S0
)) {
6532 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
6533 if (ConstantSDNode
*C
= isConstOrConstSplat(S
.getOperand(1)))
6534 if (C
->getAPIntValue() == (OpSizeInBits
- 1))
6535 return DAG
.getNode(ISD::ABS
, DL
, VT
, S0
);
6540 // fold (xor x, x) -> 0
6542 return tryFoldToZero(DL
, TLI
, VT
, DAG
, LegalOperations
);
6544 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6545 // Here is a concrete example of this equivalence:
6547 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
6548 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6552 // i16 ~1 == 0b1111111111111110
6553 // i16 rol(~1, 14) == 0b1011111111111111
6555 // Some additional tips to help conceptualize this transform:
6556 // - Try to see the operation as placing a single zero in a value of all ones.
6557 // - There exists no value for x which would allow the result to contain zero.
6558 // - Values of x larger than the bitwidth are undefined and do not require a
6559 // consistent result.
6560 // - Pushing the zero left requires shifting one bits in from the right.
6561 // A rotate left of ~1 is a nice way of achieving the desired result.
6562 if (TLI
.isOperationLegalOrCustom(ISD::ROTL
, VT
) && N0Opcode
== ISD::SHL
&&
6563 isAllOnesConstant(N1
) && isOneConstant(N0
.getOperand(0))) {
6564 return DAG
.getNode(ISD::ROTL
, DL
, VT
, DAG
.getConstant(~1, DL
, VT
),
6568 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
6569 if (N0Opcode
== N1
.getOpcode())
6570 if (SDValue V
= hoistLogicOpWithSameOpcodeHands(N
))
6573 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
6574 if (SDValue MM
= unfoldMaskedMerge(N
))
6577 // Simplify the expression using non-local knowledge.
6578 if (SimplifyDemandedBits(SDValue(N
, 0)))
6579 return SDValue(N
, 0);
6584 /// Handle transforms common to the three shifts, when the shift amount is a
6586 SDValue
DAGCombiner::visitShiftByConstant(SDNode
*N
, ConstantSDNode
*Amt
) {
6587 // Do not turn a 'not' into a regular xor.
6588 if (isBitwiseNot(N
->getOperand(0)))
6591 SDNode
*LHS
= N
->getOperand(0).getNode();
6592 if (!LHS
->hasOneUse()) return SDValue();
6594 // We want to pull some binops through shifts, so that we have (and (shift))
6595 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
6596 // thing happens with address calculations, so it's important to canonicalize
6598 bool HighBitSet
= false; // Can we transform this if the high bit is set?
6600 switch (LHS
->getOpcode()) {
6601 default: return SDValue();
6604 HighBitSet
= false; // We can only transform sra if the high bit is clear.
6607 HighBitSet
= true; // We can only transform sra if the high bit is set.
6610 if (N
->getOpcode() != ISD::SHL
)
6611 return SDValue(); // only shl(add) not sr[al](add).
6612 HighBitSet
= false; // We can only transform sra if the high bit is clear.
6616 // We require the RHS of the binop to be a constant and not opaque as well.
6617 ConstantSDNode
*BinOpCst
= getAsNonOpaqueConstant(LHS
->getOperand(1));
6618 if (!BinOpCst
) return SDValue();
6620 // FIXME: disable this unless the input to the binop is a shift by a constant
6621 // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6622 SDNode
*BinOpLHSVal
= LHS
->getOperand(0).getNode();
6623 bool isShift
= BinOpLHSVal
->getOpcode() == ISD::SHL
||
6624 BinOpLHSVal
->getOpcode() == ISD::SRA
||
6625 BinOpLHSVal
->getOpcode() == ISD::SRL
;
6626 bool isCopyOrSelect
= BinOpLHSVal
->getOpcode() == ISD::CopyFromReg
||
6627 BinOpLHSVal
->getOpcode() == ISD::SELECT
;
6629 if ((!isShift
|| !isa
<ConstantSDNode
>(BinOpLHSVal
->getOperand(1))) &&
6633 if (isCopyOrSelect
&& N
->hasOneUse())
6636 EVT VT
= N
->getValueType(0);
6638 // If this is a signed shift right, and the high bit is modified by the
6639 // logical operation, do not perform the transformation. The highBitSet
6640 // boolean indicates the value of the high bit of the constant which would
6641 // cause it to be modified for this operation.
6642 if (N
->getOpcode() == ISD::SRA
) {
6643 bool BinOpRHSSignSet
= BinOpCst
->getAPIntValue().isNegative();
6644 if (BinOpRHSSignSet
!= HighBitSet
)
6648 if (!TLI
.isDesirableToCommuteWithShift(N
, Level
))
6651 // Fold the constants, shifting the binop RHS by the shift amount.
6652 SDValue NewRHS
= DAG
.getNode(N
->getOpcode(), SDLoc(LHS
->getOperand(1)),
6654 LHS
->getOperand(1), N
->getOperand(1));
6655 assert(isa
<ConstantSDNode
>(NewRHS
) && "Folding was not successful!");
6657 // Create the new shift.
6658 SDValue NewShift
= DAG
.getNode(N
->getOpcode(),
6659 SDLoc(LHS
->getOperand(0)),
6660 VT
, LHS
->getOperand(0), N
->getOperand(1));
6662 // Create the new binop.
6663 return DAG
.getNode(LHS
->getOpcode(), SDLoc(N
), VT
, NewShift
, NewRHS
);
6666 SDValue
DAGCombiner::distributeTruncateThroughAnd(SDNode
*N
) {
6667 assert(N
->getOpcode() == ISD::TRUNCATE
);
6668 assert(N
->getOperand(0).getOpcode() == ISD::AND
);
6670 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6671 EVT TruncVT
= N
->getValueType(0);
6672 if (N
->hasOneUse() && N
->getOperand(0).hasOneUse() &&
6673 TLI
.isTypeDesirableForOp(ISD::AND
, TruncVT
)) {
6674 SDValue N01
= N
->getOperand(0).getOperand(1);
6675 if (isConstantOrConstantVector(N01
, /* NoOpaques */ true)) {
6677 SDValue N00
= N
->getOperand(0).getOperand(0);
6678 SDValue Trunc00
= DAG
.getNode(ISD::TRUNCATE
, DL
, TruncVT
, N00
);
6679 SDValue Trunc01
= DAG
.getNode(ISD::TRUNCATE
, DL
, TruncVT
, N01
);
6680 AddToWorklist(Trunc00
.getNode());
6681 AddToWorklist(Trunc01
.getNode());
6682 return DAG
.getNode(ISD::AND
, DL
, TruncVT
, Trunc00
, Trunc01
);
6689 SDValue
DAGCombiner::visitRotate(SDNode
*N
) {
6691 SDValue N0
= N
->getOperand(0);
6692 SDValue N1
= N
->getOperand(1);
6693 EVT VT
= N
->getValueType(0);
6694 unsigned Bitsize
= VT
.getScalarSizeInBits();
6696 // fold (rot x, 0) -> x
6697 if (isNullOrNullSplat(N1
))
6700 // fold (rot x, c) -> x iff (c % BitSize) == 0
6701 if (isPowerOf2_32(Bitsize
) && Bitsize
> 1) {
6702 APInt
ModuloMask(N1
.getScalarValueSizeInBits(), Bitsize
- 1);
6703 if (DAG
.MaskedValueIsZero(N1
, ModuloMask
))
6707 // fold (rot x, c) -> (rot x, c % BitSize)
6708 if (ConstantSDNode
*Cst
= isConstOrConstSplat(N1
)) {
6709 if (Cst
->getAPIntValue().uge(Bitsize
)) {
6710 uint64_t RotAmt
= Cst
->getAPIntValue().urem(Bitsize
);
6711 return DAG
.getNode(N
->getOpcode(), dl
, VT
, N0
,
6712 DAG
.getConstant(RotAmt
, dl
, N1
.getValueType()));
6716 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6717 if (N1
.getOpcode() == ISD::TRUNCATE
&&
6718 N1
.getOperand(0).getOpcode() == ISD::AND
) {
6719 if (SDValue NewOp1
= distributeTruncateThroughAnd(N1
.getNode()))
6720 return DAG
.getNode(N
->getOpcode(), dl
, VT
, N0
, NewOp1
);
6723 unsigned NextOp
= N0
.getOpcode();
6724 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6725 if (NextOp
== ISD::ROTL
|| NextOp
== ISD::ROTR
) {
6726 SDNode
*C1
= DAG
.isConstantIntBuildVectorOrConstantInt(N1
);
6727 SDNode
*C2
= DAG
.isConstantIntBuildVectorOrConstantInt(N0
.getOperand(1));
6728 if (C1
&& C2
&& C1
->getValueType(0) == C2
->getValueType(0)) {
6729 EVT ShiftVT
= C1
->getValueType(0);
6730 bool SameSide
= (N
->getOpcode() == NextOp
);
6731 unsigned CombineOp
= SameSide
? ISD::ADD
: ISD::SUB
;
6732 if (SDValue CombinedShift
=
6733 DAG
.FoldConstantArithmetic(CombineOp
, dl
, ShiftVT
, C1
, C2
)) {
6734 SDValue BitsizeC
= DAG
.getConstant(Bitsize
, dl
, ShiftVT
);
6735 SDValue CombinedShiftNorm
= DAG
.FoldConstantArithmetic(
6736 ISD::SREM
, dl
, ShiftVT
, CombinedShift
.getNode(),
6737 BitsizeC
.getNode());
6738 return DAG
.getNode(N
->getOpcode(), dl
, VT
, N0
->getOperand(0),
6746 SDValue
DAGCombiner::visitSHL(SDNode
*N
) {
6747 SDValue N0
= N
->getOperand(0);
6748 SDValue N1
= N
->getOperand(1);
6749 if (SDValue V
= DAG
.simplifyShift(N0
, N1
))
6752 EVT VT
= N0
.getValueType();
6753 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
6756 if (VT
.isVector()) {
6757 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
6760 BuildVectorSDNode
*N1CV
= dyn_cast
<BuildVectorSDNode
>(N1
);
6761 // If setcc produces all-one true value then:
6762 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6763 if (N1CV
&& N1CV
->isConstant()) {
6764 if (N0
.getOpcode() == ISD::AND
) {
6765 SDValue N00
= N0
->getOperand(0);
6766 SDValue N01
= N0
->getOperand(1);
6767 BuildVectorSDNode
*N01CV
= dyn_cast
<BuildVectorSDNode
>(N01
);
6769 if (N01CV
&& N01CV
->isConstant() && N00
.getOpcode() == ISD::SETCC
&&
6770 TLI
.getBooleanContents(N00
.getOperand(0).getValueType()) ==
6771 TargetLowering::ZeroOrNegativeOneBooleanContent
) {
6772 if (SDValue C
= DAG
.FoldConstantArithmetic(ISD::SHL
, SDLoc(N
), VT
,
6774 return DAG
.getNode(ISD::AND
, SDLoc(N
), VT
, N00
, C
);
6780 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
6782 // fold (shl c1, c2) -> c1<<c2
6783 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
6784 if (N0C
&& N1C
&& !N1C
->isOpaque())
6785 return DAG
.FoldConstantArithmetic(ISD::SHL
, SDLoc(N
), VT
, N0C
, N1C
);
6787 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
6790 // if (shl x, c) is known to be zero, return 0
6791 if (DAG
.MaskedValueIsZero(SDValue(N
, 0),
6792 APInt::getAllOnesValue(OpSizeInBits
)))
6793 return DAG
.getConstant(0, SDLoc(N
), VT
);
6794 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6795 if (N1
.getOpcode() == ISD::TRUNCATE
&&
6796 N1
.getOperand(0).getOpcode() == ISD::AND
) {
6797 if (SDValue NewOp1
= distributeTruncateThroughAnd(N1
.getNode()))
6798 return DAG
.getNode(ISD::SHL
, SDLoc(N
), VT
, N0
, NewOp1
);
6801 if (N1C
&& SimplifyDemandedBits(SDValue(N
, 0)))
6802 return SDValue(N
, 0);
6804 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6805 if (N0
.getOpcode() == ISD::SHL
) {
6806 auto MatchOutOfRange
= [OpSizeInBits
](ConstantSDNode
*LHS
,
6807 ConstantSDNode
*RHS
) {
6808 APInt c1
= LHS
->getAPIntValue();
6809 APInt c2
= RHS
->getAPIntValue();
6810 zeroExtendToMatch(c1
, c2
, 1 /* Overflow Bit */);
6811 return (c1
+ c2
).uge(OpSizeInBits
);
6813 if (ISD::matchBinaryPredicate(N1
, N0
.getOperand(1), MatchOutOfRange
))
6814 return DAG
.getConstant(0, SDLoc(N
), VT
);
6816 auto MatchInRange
= [OpSizeInBits
](ConstantSDNode
*LHS
,
6817 ConstantSDNode
*RHS
) {
6818 APInt c1
= LHS
->getAPIntValue();
6819 APInt c2
= RHS
->getAPIntValue();
6820 zeroExtendToMatch(c1
, c2
, 1 /* Overflow Bit */);
6821 return (c1
+ c2
).ult(OpSizeInBits
);
6823 if (ISD::matchBinaryPredicate(N1
, N0
.getOperand(1), MatchInRange
)) {
6825 EVT ShiftVT
= N1
.getValueType();
6826 SDValue Sum
= DAG
.getNode(ISD::ADD
, DL
, ShiftVT
, N1
, N0
.getOperand(1));
6827 return DAG
.getNode(ISD::SHL
, DL
, VT
, N0
.getOperand(0), Sum
);
6831 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6832 // For this to be valid, the second form must not preserve any of the bits
6833 // that are shifted out by the inner shift in the first form. This means
6834 // the outer shift size must be >= the number of bits added by the ext.
6835 // As a corollary, we don't care what kind of ext it is.
6836 if (N1C
&& (N0
.getOpcode() == ISD::ZERO_EXTEND
||
6837 N0
.getOpcode() == ISD::ANY_EXTEND
||
6838 N0
.getOpcode() == ISD::SIGN_EXTEND
) &&
6839 N0
.getOperand(0).getOpcode() == ISD::SHL
) {
6840 SDValue N0Op0
= N0
.getOperand(0);
6841 if (ConstantSDNode
*N0Op0C1
= isConstOrConstSplat(N0Op0
.getOperand(1))) {
6842 APInt c1
= N0Op0C1
->getAPIntValue();
6843 APInt c2
= N1C
->getAPIntValue();
6844 zeroExtendToMatch(c1
, c2
, 1 /* Overflow Bit */);
6846 EVT InnerShiftVT
= N0Op0
.getValueType();
6847 uint64_t InnerShiftSize
= InnerShiftVT
.getScalarSizeInBits();
6848 if (c2
.uge(OpSizeInBits
- InnerShiftSize
)) {
6850 APInt Sum
= c1
+ c2
;
6851 if (Sum
.uge(OpSizeInBits
))
6852 return DAG
.getConstant(0, DL
, VT
);
6856 DAG
.getNode(N0
.getOpcode(), DL
, VT
, N0Op0
->getOperand(0)),
6857 DAG
.getConstant(Sum
.getZExtValue(), DL
, N1
.getValueType()));
6862 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6863 // Only fold this if the inner zext has no other uses to avoid increasing
6864 // the total number of instructions.
6865 if (N1C
&& N0
.getOpcode() == ISD::ZERO_EXTEND
&& N0
.hasOneUse() &&
6866 N0
.getOperand(0).getOpcode() == ISD::SRL
) {
6867 SDValue N0Op0
= N0
.getOperand(0);
6868 if (ConstantSDNode
*N0Op0C1
= isConstOrConstSplat(N0Op0
.getOperand(1))) {
6869 if (N0Op0C1
->getAPIntValue().ult(VT
.getScalarSizeInBits())) {
6870 uint64_t c1
= N0Op0C1
->getZExtValue();
6871 uint64_t c2
= N1C
->getZExtValue();
6873 SDValue NewOp0
= N0
.getOperand(0);
6874 EVT CountVT
= NewOp0
.getOperand(1).getValueType();
6876 SDValue NewSHL
= DAG
.getNode(ISD::SHL
, DL
, NewOp0
.getValueType(),
6878 DAG
.getConstant(c2
, DL
, CountVT
));
6879 AddToWorklist(NewSHL
.getNode());
6880 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(N0
), VT
, NewSHL
);
6886 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
6887 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
6888 if (N1C
&& (N0
.getOpcode() == ISD::SRL
|| N0
.getOpcode() == ISD::SRA
) &&
6889 N0
->getFlags().hasExact()) {
6890 if (ConstantSDNode
*N0C1
= isConstOrConstSplat(N0
.getOperand(1))) {
6891 uint64_t C1
= N0C1
->getZExtValue();
6892 uint64_t C2
= N1C
->getZExtValue();
6895 return DAG
.getNode(ISD::SHL
, DL
, VT
, N0
.getOperand(0),
6896 DAG
.getConstant(C2
- C1
, DL
, N1
.getValueType()));
6897 return DAG
.getNode(N0
.getOpcode(), DL
, VT
, N0
.getOperand(0),
6898 DAG
.getConstant(C1
- C2
, DL
, N1
.getValueType()));
6902 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6903 // (and (srl x, (sub c1, c2), MASK)
6904 // Only fold this if the inner shift has no other uses -- if it does, folding
6905 // this will increase the total number of instructions.
6906 // TODO - drop hasOneUse requirement if c1 == c2?
6907 // TODO - support non-uniform vector shift amounts.
6908 if (N1C
&& N0
.getOpcode() == ISD::SRL
&& N0
.hasOneUse() &&
6909 TLI
.shouldFoldConstantShiftPairToMask(N
, Level
)) {
6910 if (ConstantSDNode
*N0C1
= isConstOrConstSplat(N0
.getOperand(1))) {
6911 if (N0C1
->getAPIntValue().ult(OpSizeInBits
)) {
6912 uint64_t c1
= N0C1
->getZExtValue();
6913 uint64_t c2
= N1C
->getZExtValue();
6914 APInt Mask
= APInt::getHighBitsSet(OpSizeInBits
, OpSizeInBits
- c1
);
6919 Shift
= DAG
.getNode(ISD::SHL
, DL
, VT
, N0
.getOperand(0),
6920 DAG
.getConstant(c2
- c1
, DL
, N1
.getValueType()));
6922 Mask
.lshrInPlace(c1
- c2
);
6924 Shift
= DAG
.getNode(ISD::SRL
, DL
, VT
, N0
.getOperand(0),
6925 DAG
.getConstant(c1
- c2
, DL
, N1
.getValueType()));
6928 return DAG
.getNode(ISD::AND
, DL
, VT
, Shift
,
6929 DAG
.getConstant(Mask
, DL
, VT
));
6934 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6935 if (N0
.getOpcode() == ISD::SRA
&& N1
== N0
.getOperand(1) &&
6936 isConstantOrConstantVector(N1
, /* No Opaques */ true)) {
6938 SDValue AllBits
= DAG
.getAllOnesConstant(DL
, VT
);
6939 SDValue HiBitsMask
= DAG
.getNode(ISD::SHL
, DL
, VT
, AllBits
, N1
);
6940 return DAG
.getNode(ISD::AND
, DL
, VT
, N0
.getOperand(0), HiBitsMask
);
6943 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6944 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6945 // Variant of version done on multiply, except mul by a power of 2 is turned
6947 if ((N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::OR
) &&
6948 N0
.getNode()->hasOneUse() &&
6949 isConstantOrConstantVector(N1
, /* No Opaques */ true) &&
6950 isConstantOrConstantVector(N0
.getOperand(1), /* No Opaques */ true) &&
6951 TLI
.isDesirableToCommuteWithShift(N
, Level
)) {
6952 SDValue Shl0
= DAG
.getNode(ISD::SHL
, SDLoc(N0
), VT
, N0
.getOperand(0), N1
);
6953 SDValue Shl1
= DAG
.getNode(ISD::SHL
, SDLoc(N1
), VT
, N0
.getOperand(1), N1
);
6954 AddToWorklist(Shl0
.getNode());
6955 AddToWorklist(Shl1
.getNode());
6956 return DAG
.getNode(N0
.getOpcode(), SDLoc(N
), VT
, Shl0
, Shl1
);
6959 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6960 if (N0
.getOpcode() == ISD::MUL
&& N0
.getNode()->hasOneUse() &&
6961 isConstantOrConstantVector(N1
, /* No Opaques */ true) &&
6962 isConstantOrConstantVector(N0
.getOperand(1), /* No Opaques */ true)) {
6963 SDValue Shl
= DAG
.getNode(ISD::SHL
, SDLoc(N1
), VT
, N0
.getOperand(1), N1
);
6964 if (isConstantOrConstantVector(Shl
))
6965 return DAG
.getNode(ISD::MUL
, SDLoc(N
), VT
, N0
.getOperand(0), Shl
);
6968 if (N1C
&& !N1C
->isOpaque())
6969 if (SDValue NewSHL
= visitShiftByConstant(N
, N1C
))
6975 SDValue
DAGCombiner::visitSRA(SDNode
*N
) {
6976 SDValue N0
= N
->getOperand(0);
6977 SDValue N1
= N
->getOperand(1);
6978 if (SDValue V
= DAG
.simplifyShift(N0
, N1
))
6981 EVT VT
= N0
.getValueType();
6982 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
6984 // Arithmetic shifting an all-sign-bit value is a no-op.
6985 // fold (sra 0, x) -> 0
6986 // fold (sra -1, x) -> -1
6987 if (DAG
.ComputeNumSignBits(N0
) == OpSizeInBits
)
6992 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
6995 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
6997 // fold (sra c1, c2) -> (sra c1, c2)
6998 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
6999 if (N0C
&& N1C
&& !N1C
->isOpaque())
7000 return DAG
.FoldConstantArithmetic(ISD::SRA
, SDLoc(N
), VT
, N0C
, N1C
);
7002 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
7005 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7007 if (N1C
&& N0
.getOpcode() == ISD::SHL
&& N1
== N0
.getOperand(1)) {
7008 unsigned LowBits
= OpSizeInBits
- (unsigned)N1C
->getZExtValue();
7009 EVT ExtVT
= EVT::getIntegerVT(*DAG
.getContext(), LowBits
);
7011 ExtVT
= EVT::getVectorVT(*DAG
.getContext(),
7012 ExtVT
, VT
.getVectorNumElements());
7013 if ((!LegalOperations
||
7014 TLI
.isOperationLegal(ISD::SIGN_EXTEND_INREG
, ExtVT
)))
7015 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, SDLoc(N
), VT
,
7016 N0
.getOperand(0), DAG
.getValueType(ExtVT
));
7019 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7020 // clamp (add c1, c2) to max shift.
7021 if (N0
.getOpcode() == ISD::SRA
) {
7023 EVT ShiftVT
= N1
.getValueType();
7024 EVT ShiftSVT
= ShiftVT
.getScalarType();
7025 SmallVector
<SDValue
, 16> ShiftValues
;
7027 auto SumOfShifts
= [&](ConstantSDNode
*LHS
, ConstantSDNode
*RHS
) {
7028 APInt c1
= LHS
->getAPIntValue();
7029 APInt c2
= RHS
->getAPIntValue();
7030 zeroExtendToMatch(c1
, c2
, 1 /* Overflow Bit */);
7031 APInt Sum
= c1
+ c2
;
7033 Sum
.uge(OpSizeInBits
) ? (OpSizeInBits
- 1) : Sum
.getZExtValue();
7034 ShiftValues
.push_back(DAG
.getConstant(ShiftSum
, DL
, ShiftSVT
));
7037 if (ISD::matchBinaryPredicate(N1
, N0
.getOperand(1), SumOfShifts
)) {
7040 ShiftValue
= DAG
.getBuildVector(ShiftVT
, DL
, ShiftValues
);
7042 ShiftValue
= ShiftValues
[0];
7043 return DAG
.getNode(ISD::SRA
, DL
, VT
, N0
.getOperand(0), ShiftValue
);
7047 // fold (sra (shl X, m), (sub result_size, n))
7048 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7049 // result_size - n != m.
7050 // If truncate is free for the target sext(shl) is likely to result in better
7052 if (N0
.getOpcode() == ISD::SHL
&& N1C
) {
7053 // Get the two constanst of the shifts, CN0 = m, CN = n.
7054 const ConstantSDNode
*N01C
= isConstOrConstSplat(N0
.getOperand(1));
7056 LLVMContext
&Ctx
= *DAG
.getContext();
7057 // Determine what the truncate's result bitsize and type would be.
7058 EVT TruncVT
= EVT::getIntegerVT(Ctx
, OpSizeInBits
- N1C
->getZExtValue());
7061 TruncVT
= EVT::getVectorVT(Ctx
, TruncVT
, VT
.getVectorNumElements());
7063 // Determine the residual right-shift amount.
7064 int ShiftAmt
= N1C
->getZExtValue() - N01C
->getZExtValue();
7066 // If the shift is not a no-op (in which case this should be just a sign
7067 // extend already), the truncated to type is legal, sign_extend is legal
7068 // on that type, and the truncate to that type is both legal and free,
7069 // perform the transform.
7070 if ((ShiftAmt
> 0) &&
7071 TLI
.isOperationLegalOrCustom(ISD::SIGN_EXTEND
, TruncVT
) &&
7072 TLI
.isOperationLegalOrCustom(ISD::TRUNCATE
, VT
) &&
7073 TLI
.isTruncateFree(VT
, TruncVT
)) {
7075 SDValue Amt
= DAG
.getConstant(ShiftAmt
, DL
,
7076 getShiftAmountTy(N0
.getOperand(0).getValueType()));
7077 SDValue Shift
= DAG
.getNode(ISD::SRL
, DL
, VT
,
7078 N0
.getOperand(0), Amt
);
7079 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, DL
, TruncVT
,
7081 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
,
7082 N
->getValueType(0), Trunc
);
7087 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7088 if (N1
.getOpcode() == ISD::TRUNCATE
&&
7089 N1
.getOperand(0).getOpcode() == ISD::AND
) {
7090 if (SDValue NewOp1
= distributeTruncateThroughAnd(N1
.getNode()))
7091 return DAG
.getNode(ISD::SRA
, SDLoc(N
), VT
, N0
, NewOp1
);
7094 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7095 // if c1 is equal to the number of bits the trunc removes
7096 if (N0
.getOpcode() == ISD::TRUNCATE
&&
7097 (N0
.getOperand(0).getOpcode() == ISD::SRL
||
7098 N0
.getOperand(0).getOpcode() == ISD::SRA
) &&
7099 N0
.getOperand(0).hasOneUse() &&
7100 N0
.getOperand(0).getOperand(1).hasOneUse() &&
7102 SDValue N0Op0
= N0
.getOperand(0);
7103 if (ConstantSDNode
*LargeShift
= isConstOrConstSplat(N0Op0
.getOperand(1))) {
7104 unsigned LargeShiftVal
= LargeShift
->getZExtValue();
7105 EVT LargeVT
= N0Op0
.getValueType();
7107 if (LargeVT
.getScalarSizeInBits() - OpSizeInBits
== LargeShiftVal
) {
7110 DAG
.getConstant(LargeShiftVal
+ N1C
->getZExtValue(), DL
,
7111 getShiftAmountTy(N0Op0
.getOperand(0).getValueType()));
7112 SDValue SRA
= DAG
.getNode(ISD::SRA
, DL
, LargeVT
,
7113 N0Op0
.getOperand(0), Amt
);
7114 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, SRA
);
7119 // Simplify, based on bits shifted out of the LHS.
7120 if (N1C
&& SimplifyDemandedBits(SDValue(N
, 0)))
7121 return SDValue(N
, 0);
7123 // If the sign bit is known to be zero, switch this to a SRL.
7124 if (DAG
.SignBitIsZero(N0
))
7125 return DAG
.getNode(ISD::SRL
, SDLoc(N
), VT
, N0
, N1
);
7127 if (N1C
&& !N1C
->isOpaque())
7128 if (SDValue NewSRA
= visitShiftByConstant(N
, N1C
))
7134 SDValue
DAGCombiner::visitSRL(SDNode
*N
) {
7135 SDValue N0
= N
->getOperand(0);
7136 SDValue N1
= N
->getOperand(1);
7137 if (SDValue V
= DAG
.simplifyShift(N0
, N1
))
7140 EVT VT
= N0
.getValueType();
7141 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
7145 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
7148 ConstantSDNode
*N1C
= isConstOrConstSplat(N1
);
7150 // fold (srl c1, c2) -> c1 >>u c2
7151 ConstantSDNode
*N0C
= getAsNonOpaqueConstant(N0
);
7152 if (N0C
&& N1C
&& !N1C
->isOpaque())
7153 return DAG
.FoldConstantArithmetic(ISD::SRL
, SDLoc(N
), VT
, N0C
, N1C
);
7155 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
7158 // if (srl x, c) is known to be zero, return 0
7159 if (N1C
&& DAG
.MaskedValueIsZero(SDValue(N
, 0),
7160 APInt::getAllOnesValue(OpSizeInBits
)))
7161 return DAG
.getConstant(0, SDLoc(N
), VT
);
7163 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7164 if (N0
.getOpcode() == ISD::SRL
) {
7165 auto MatchOutOfRange
= [OpSizeInBits
](ConstantSDNode
*LHS
,
7166 ConstantSDNode
*RHS
) {
7167 APInt c1
= LHS
->getAPIntValue();
7168 APInt c2
= RHS
->getAPIntValue();
7169 zeroExtendToMatch(c1
, c2
, 1 /* Overflow Bit */);
7170 return (c1
+ c2
).uge(OpSizeInBits
);
7172 if (ISD::matchBinaryPredicate(N1
, N0
.getOperand(1), MatchOutOfRange
))
7173 return DAG
.getConstant(0, SDLoc(N
), VT
);
7175 auto MatchInRange
= [OpSizeInBits
](ConstantSDNode
*LHS
,
7176 ConstantSDNode
*RHS
) {
7177 APInt c1
= LHS
->getAPIntValue();
7178 APInt c2
= RHS
->getAPIntValue();
7179 zeroExtendToMatch(c1
, c2
, 1 /* Overflow Bit */);
7180 return (c1
+ c2
).ult(OpSizeInBits
);
7182 if (ISD::matchBinaryPredicate(N1
, N0
.getOperand(1), MatchInRange
)) {
7184 EVT ShiftVT
= N1
.getValueType();
7185 SDValue Sum
= DAG
.getNode(ISD::ADD
, DL
, ShiftVT
, N1
, N0
.getOperand(1));
7186 return DAG
.getNode(ISD::SRL
, DL
, VT
, N0
.getOperand(0), Sum
);
7190 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7191 if (N1C
&& N0
.getOpcode() == ISD::TRUNCATE
&&
7192 N0
.getOperand(0).getOpcode() == ISD::SRL
) {
7193 if (auto N001C
= isConstOrConstSplat(N0
.getOperand(0).getOperand(1))) {
7194 uint64_t c1
= N001C
->getZExtValue();
7195 uint64_t c2
= N1C
->getZExtValue();
7196 EVT InnerShiftVT
= N0
.getOperand(0).getValueType();
7197 EVT ShiftCountVT
= N0
.getOperand(0).getOperand(1).getValueType();
7198 uint64_t InnerShiftSize
= InnerShiftVT
.getScalarSizeInBits();
7199 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7200 if (c1
+ OpSizeInBits
== InnerShiftSize
) {
7202 if (c1
+ c2
>= InnerShiftSize
)
7203 return DAG
.getConstant(0, DL
, VT
);
7204 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
,
7205 DAG
.getNode(ISD::SRL
, DL
, InnerShiftVT
,
7206 N0
.getOperand(0).getOperand(0),
7207 DAG
.getConstant(c1
+ c2
, DL
,
7213 // fold (srl (shl x, c), c) -> (and x, cst2)
7214 // TODO - (srl (shl x, c1), c2).
7215 if (N0
.getOpcode() == ISD::SHL
&& N0
.getOperand(1) == N1
&&
7216 isConstantOrConstantVector(N1
, /* NoOpaques */ true)) {
7219 DAG
.getNode(ISD::SRL
, DL
, VT
, DAG
.getAllOnesConstant(DL
, VT
), N1
);
7220 AddToWorklist(Mask
.getNode());
7221 return DAG
.getNode(ISD::AND
, DL
, VT
, N0
.getOperand(0), Mask
);
7224 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7225 if (N1C
&& N0
.getOpcode() == ISD::ANY_EXTEND
) {
7226 // Shifting in all undef bits?
7227 EVT SmallVT
= N0
.getOperand(0).getValueType();
7228 unsigned BitSize
= SmallVT
.getScalarSizeInBits();
7229 if (N1C
->getZExtValue() >= BitSize
)
7230 return DAG
.getUNDEF(VT
);
7232 if (!LegalTypes
|| TLI
.isTypeDesirableForOp(ISD::SRL
, SmallVT
)) {
7233 uint64_t ShiftAmt
= N1C
->getZExtValue();
7235 SDValue SmallShift
= DAG
.getNode(ISD::SRL
, DL0
, SmallVT
,
7237 DAG
.getConstant(ShiftAmt
, DL0
,
7238 getShiftAmountTy(SmallVT
)));
7239 AddToWorklist(SmallShift
.getNode());
7240 APInt Mask
= APInt::getLowBitsSet(OpSizeInBits
, OpSizeInBits
- ShiftAmt
);
7242 return DAG
.getNode(ISD::AND
, DL
, VT
,
7243 DAG
.getNode(ISD::ANY_EXTEND
, DL
, VT
, SmallShift
),
7244 DAG
.getConstant(Mask
, DL
, VT
));
7248 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
7249 // bit, which is unmodified by sra.
7250 if (N1C
&& N1C
->getZExtValue() + 1 == OpSizeInBits
) {
7251 if (N0
.getOpcode() == ISD::SRA
)
7252 return DAG
.getNode(ISD::SRL
, SDLoc(N
), VT
, N0
.getOperand(0), N1
);
7255 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
7256 if (N1C
&& N0
.getOpcode() == ISD::CTLZ
&&
7257 N1C
->getAPIntValue() == Log2_32(OpSizeInBits
)) {
7258 KnownBits Known
= DAG
.computeKnownBits(N0
.getOperand(0));
7260 // If any of the input bits are KnownOne, then the input couldn't be all
7261 // zeros, thus the result of the srl will always be zero.
7262 if (Known
.One
.getBoolValue()) return DAG
.getConstant(0, SDLoc(N0
), VT
);
7264 // If all of the bits input the to ctlz node are known to be zero, then
7265 // the result of the ctlz is "32" and the result of the shift is one.
7266 APInt UnknownBits
= ~Known
.Zero
;
7267 if (UnknownBits
== 0) return DAG
.getConstant(1, SDLoc(N0
), VT
);
7269 // Otherwise, check to see if there is exactly one bit input to the ctlz.
7270 if (UnknownBits
.isPowerOf2()) {
7271 // Okay, we know that only that the single bit specified by UnknownBits
7272 // could be set on input to the CTLZ node. If this bit is set, the SRL
7273 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7274 // to an SRL/XOR pair, which is likely to simplify more.
7275 unsigned ShAmt
= UnknownBits
.countTrailingZeros();
7276 SDValue Op
= N0
.getOperand(0);
7280 Op
= DAG
.getNode(ISD::SRL
, DL
, VT
, Op
,
7281 DAG
.getConstant(ShAmt
, DL
,
7282 getShiftAmountTy(Op
.getValueType())));
7283 AddToWorklist(Op
.getNode());
7287 return DAG
.getNode(ISD::XOR
, DL
, VT
,
7288 Op
, DAG
.getConstant(1, DL
, VT
));
7292 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7293 if (N1
.getOpcode() == ISD::TRUNCATE
&&
7294 N1
.getOperand(0).getOpcode() == ISD::AND
) {
7295 if (SDValue NewOp1
= distributeTruncateThroughAnd(N1
.getNode()))
7296 return DAG
.getNode(ISD::SRL
, SDLoc(N
), VT
, N0
, NewOp1
);
7299 // fold operands of srl based on knowledge that the low bits are not
7301 if (N1C
&& SimplifyDemandedBits(SDValue(N
, 0)))
7302 return SDValue(N
, 0);
7304 if (N1C
&& !N1C
->isOpaque())
7305 if (SDValue NewSRL
= visitShiftByConstant(N
, N1C
))
7308 // Attempt to convert a srl of a load into a narrower zero-extending load.
7309 if (SDValue NarrowLoad
= ReduceLoadWidth(N
))
7312 // Here is a common situation. We want to optimize:
7315 // %b = and i32 %a, 2
7316 // %c = srl i32 %b, 1
7317 // brcond i32 %c ...
7323 // %c = setcc eq %b, 0
7326 // However when after the source operand of SRL is optimized into AND, the SRL
7327 // itself may not be optimized further. Look for it and add the BRCOND into
7329 if (N
->hasOneUse()) {
7330 SDNode
*Use
= *N
->use_begin();
7331 if (Use
->getOpcode() == ISD::BRCOND
)
7333 else if (Use
->getOpcode() == ISD::TRUNCATE
&& Use
->hasOneUse()) {
7334 // Also look pass the truncate.
7335 Use
= *Use
->use_begin();
7336 if (Use
->getOpcode() == ISD::BRCOND
)
7344 SDValue
DAGCombiner::visitFunnelShift(SDNode
*N
) {
7345 EVT VT
= N
->getValueType(0);
7346 SDValue N0
= N
->getOperand(0);
7347 SDValue N1
= N
->getOperand(1);
7348 SDValue N2
= N
->getOperand(2);
7349 bool IsFSHL
= N
->getOpcode() == ISD::FSHL
;
7350 unsigned BitWidth
= VT
.getScalarSizeInBits();
7352 // fold (fshl N0, N1, 0) -> N0
7353 // fold (fshr N0, N1, 0) -> N1
7354 if (isPowerOf2_32(BitWidth
))
7355 if (DAG
.MaskedValueIsZero(
7356 N2
, APInt(N2
.getScalarValueSizeInBits(), BitWidth
- 1)))
7357 return IsFSHL
? N0
: N1
;
7359 auto IsUndefOrZero
= [](SDValue V
) {
7360 return V
.isUndef() || isNullOrNullSplat(V
, /*AllowUndefs*/ true);
7363 if (ConstantSDNode
*Cst
= isConstOrConstSplat(N2
)) {
7364 EVT ShAmtTy
= N2
.getValueType();
7366 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7367 if (Cst
->getAPIntValue().uge(BitWidth
)) {
7368 uint64_t RotAmt
= Cst
->getAPIntValue().urem(BitWidth
);
7369 return DAG
.getNode(N
->getOpcode(), SDLoc(N
), VT
, N0
, N1
,
7370 DAG
.getConstant(RotAmt
, SDLoc(N
), ShAmtTy
));
7373 unsigned ShAmt
= Cst
->getZExtValue();
7375 return IsFSHL
? N0
: N1
;
7377 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7378 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7379 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7380 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7381 if (IsUndefOrZero(N0
))
7382 return DAG
.getNode(ISD::SRL
, SDLoc(N
), VT
, N1
,
7383 DAG
.getConstant(IsFSHL
? BitWidth
- ShAmt
: ShAmt
,
7384 SDLoc(N
), ShAmtTy
));
7385 if (IsUndefOrZero(N1
))
7386 return DAG
.getNode(ISD::SHL
, SDLoc(N
), VT
, N0
,
7387 DAG
.getConstant(IsFSHL
? ShAmt
: BitWidth
- ShAmt
,
7388 SDLoc(N
), ShAmtTy
));
7391 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7392 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7393 // iff We know the shift amount is in range.
7394 // TODO: when is it worth doing SUB(BW, N2) as well?
7395 if (isPowerOf2_32(BitWidth
)) {
7396 APInt
ModuloBits(N2
.getScalarValueSizeInBits(), BitWidth
- 1);
7397 if (IsUndefOrZero(N0
) && !IsFSHL
&& DAG
.MaskedValueIsZero(N2
, ~ModuloBits
))
7398 return DAG
.getNode(ISD::SRL
, SDLoc(N
), VT
, N1
, N2
);
7399 if (IsUndefOrZero(N1
) && IsFSHL
&& DAG
.MaskedValueIsZero(N2
, ~ModuloBits
))
7400 return DAG
.getNode(ISD::SHL
, SDLoc(N
), VT
, N0
, N2
);
7403 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7404 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7405 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7406 // is legal as well we might be better off avoiding non-constant (BW - N2).
7407 unsigned RotOpc
= IsFSHL
? ISD::ROTL
: ISD::ROTR
;
7408 if (N0
== N1
&& hasOperation(RotOpc
, VT
))
7409 return DAG
.getNode(RotOpc
, SDLoc(N
), VT
, N0
, N2
);
7411 // Simplify, based on bits shifted out of N0/N1.
7412 if (SimplifyDemandedBits(SDValue(N
, 0)))
7413 return SDValue(N
, 0);
7418 SDValue
DAGCombiner::visitABS(SDNode
*N
) {
7419 SDValue N0
= N
->getOperand(0);
7420 EVT VT
= N
->getValueType(0);
7422 // fold (abs c1) -> c2
7423 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7424 return DAG
.getNode(ISD::ABS
, SDLoc(N
), VT
, N0
);
7425 // fold (abs (abs x)) -> (abs x)
7426 if (N0
.getOpcode() == ISD::ABS
)
7428 // fold (abs x) -> x iff not-negative
7429 if (DAG
.SignBitIsZero(N0
))
7434 SDValue
DAGCombiner::visitBSWAP(SDNode
*N
) {
7435 SDValue N0
= N
->getOperand(0);
7436 EVT VT
= N
->getValueType(0);
7438 // fold (bswap c1) -> c2
7439 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7440 return DAG
.getNode(ISD::BSWAP
, SDLoc(N
), VT
, N0
);
7441 // fold (bswap (bswap x)) -> x
7442 if (N0
.getOpcode() == ISD::BSWAP
)
7443 return N0
->getOperand(0);
7447 SDValue
DAGCombiner::visitBITREVERSE(SDNode
*N
) {
7448 SDValue N0
= N
->getOperand(0);
7449 EVT VT
= N
->getValueType(0);
7451 // fold (bitreverse c1) -> c2
7452 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7453 return DAG
.getNode(ISD::BITREVERSE
, SDLoc(N
), VT
, N0
);
7454 // fold (bitreverse (bitreverse x)) -> x
7455 if (N0
.getOpcode() == ISD::BITREVERSE
)
7456 return N0
.getOperand(0);
7460 SDValue
DAGCombiner::visitCTLZ(SDNode
*N
) {
7461 SDValue N0
= N
->getOperand(0);
7462 EVT VT
= N
->getValueType(0);
7464 // fold (ctlz c1) -> c2
7465 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7466 return DAG
.getNode(ISD::CTLZ
, SDLoc(N
), VT
, N0
);
7468 // If the value is known never to be zero, switch to the undef version.
7469 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::CTLZ_ZERO_UNDEF
, VT
)) {
7470 if (DAG
.isKnownNeverZero(N0
))
7471 return DAG
.getNode(ISD::CTLZ_ZERO_UNDEF
, SDLoc(N
), VT
, N0
);
7477 SDValue
DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode
*N
) {
7478 SDValue N0
= N
->getOperand(0);
7479 EVT VT
= N
->getValueType(0);
7481 // fold (ctlz_zero_undef c1) -> c2
7482 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7483 return DAG
.getNode(ISD::CTLZ_ZERO_UNDEF
, SDLoc(N
), VT
, N0
);
7487 SDValue
DAGCombiner::visitCTTZ(SDNode
*N
) {
7488 SDValue N0
= N
->getOperand(0);
7489 EVT VT
= N
->getValueType(0);
7491 // fold (cttz c1) -> c2
7492 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7493 return DAG
.getNode(ISD::CTTZ
, SDLoc(N
), VT
, N0
);
7495 // If the value is known never to be zero, switch to the undef version.
7496 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::CTTZ_ZERO_UNDEF
, VT
)) {
7497 if (DAG
.isKnownNeverZero(N0
))
7498 return DAG
.getNode(ISD::CTTZ_ZERO_UNDEF
, SDLoc(N
), VT
, N0
);
7504 SDValue
DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode
*N
) {
7505 SDValue N0
= N
->getOperand(0);
7506 EVT VT
= N
->getValueType(0);
7508 // fold (cttz_zero_undef c1) -> c2
7509 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7510 return DAG
.getNode(ISD::CTTZ_ZERO_UNDEF
, SDLoc(N
), VT
, N0
);
7514 SDValue
DAGCombiner::visitCTPOP(SDNode
*N
) {
7515 SDValue N0
= N
->getOperand(0);
7516 EVT VT
= N
->getValueType(0);
7518 // fold (ctpop c1) -> c2
7519 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
7520 return DAG
.getNode(ISD::CTPOP
, SDLoc(N
), VT
, N0
);
7524 // FIXME: This should be checking for no signed zeros on individual operands, as
7526 static bool isLegalToCombineMinNumMaxNum(SelectionDAG
&DAG
, SDValue LHS
,
7528 const TargetLowering
&TLI
) {
7529 const TargetOptions
&Options
= DAG
.getTarget().Options
;
7530 EVT VT
= LHS
.getValueType();
7532 return Options
.NoSignedZerosFPMath
&& VT
.isFloatingPoint() &&
7533 TLI
.isProfitableToCombineMinNumMaxNum(VT
) &&
7534 DAG
.isKnownNeverNaN(LHS
) && DAG
.isKnownNeverNaN(RHS
);
7537 /// Generate Min/Max node
7538 static SDValue
combineMinNumMaxNum(const SDLoc
&DL
, EVT VT
, SDValue LHS
,
7539 SDValue RHS
, SDValue True
, SDValue False
,
7540 ISD::CondCode CC
, const TargetLowering
&TLI
,
7541 SelectionDAG
&DAG
) {
7542 if (!(LHS
== True
&& RHS
== False
) && !(LHS
== False
&& RHS
== True
))
7545 EVT TransformVT
= TLI
.getTypeToTransformTo(*DAG
.getContext(), VT
);
7553 // Since it's known never nan to get here already, either fminnum or
7554 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7555 // expanded in terms of it.
7556 unsigned IEEEOpcode
= (LHS
== True
) ? ISD::FMINNUM_IEEE
: ISD::FMAXNUM_IEEE
;
7557 if (TLI
.isOperationLegalOrCustom(IEEEOpcode
, VT
))
7558 return DAG
.getNode(IEEEOpcode
, DL
, VT
, LHS
, RHS
);
7560 unsigned Opcode
= (LHS
== True
) ? ISD::FMINNUM
: ISD::FMAXNUM
;
7561 if (TLI
.isOperationLegalOrCustom(Opcode
, TransformVT
))
7562 return DAG
.getNode(Opcode
, DL
, VT
, LHS
, RHS
);
7571 unsigned IEEEOpcode
= (LHS
== True
) ? ISD::FMAXNUM_IEEE
: ISD::FMINNUM_IEEE
;
7572 if (TLI
.isOperationLegalOrCustom(IEEEOpcode
, VT
))
7573 return DAG
.getNode(IEEEOpcode
, DL
, VT
, LHS
, RHS
);
7575 unsigned Opcode
= (LHS
== True
) ? ISD::FMAXNUM
: ISD::FMINNUM
;
7576 if (TLI
.isOperationLegalOrCustom(Opcode
, TransformVT
))
7577 return DAG
.getNode(Opcode
, DL
, VT
, LHS
, RHS
);
7585 SDValue
DAGCombiner::foldSelectOfConstants(SDNode
*N
) {
7586 SDValue Cond
= N
->getOperand(0);
7587 SDValue N1
= N
->getOperand(1);
7588 SDValue N2
= N
->getOperand(2);
7589 EVT VT
= N
->getValueType(0);
7590 EVT CondVT
= Cond
.getValueType();
7593 if (!VT
.isInteger())
7596 auto *C1
= dyn_cast
<ConstantSDNode
>(N1
);
7597 auto *C2
= dyn_cast
<ConstantSDNode
>(N2
);
7601 // Only do this before legalization to avoid conflicting with target-specific
7602 // transforms in the other direction (create a select from a zext/sext). There
7603 // is also a target-independent combine here in DAGCombiner in the other
7604 // direction for (select Cond, -1, 0) when the condition is not i1.
7605 if (CondVT
== MVT::i1
&& !LegalOperations
) {
7606 if (C1
->isNullValue() && C2
->isOne()) {
7607 // select Cond, 0, 1 --> zext (!Cond)
7608 SDValue NotCond
= DAG
.getNOT(DL
, Cond
, MVT::i1
);
7610 NotCond
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, NotCond
);
7613 if (C1
->isNullValue() && C2
->isAllOnesValue()) {
7614 // select Cond, 0, -1 --> sext (!Cond)
7615 SDValue NotCond
= DAG
.getNOT(DL
, Cond
, MVT::i1
);
7617 NotCond
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VT
, NotCond
);
7620 if (C1
->isOne() && C2
->isNullValue()) {
7621 // select Cond, 1, 0 --> zext (Cond)
7623 Cond
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Cond
);
7626 if (C1
->isAllOnesValue() && C2
->isNullValue()) {
7627 // select Cond, -1, 0 --> sext (Cond)
7629 Cond
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VT
, Cond
);
7633 // For any constants that differ by 1, we can transform the select into an
7634 // extend and add. Use a target hook because some targets may prefer to
7635 // transform in the other direction.
7636 if (TLI
.convertSelectOfConstantsToMath(VT
)) {
7637 if (C1
->getAPIntValue() - 1 == C2
->getAPIntValue()) {
7638 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7640 Cond
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Cond
);
7641 return DAG
.getNode(ISD::ADD
, DL
, VT
, Cond
, N2
);
7643 if (C1
->getAPIntValue() + 1 == C2
->getAPIntValue()) {
7644 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7646 Cond
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VT
, Cond
);
7647 return DAG
.getNode(ISD::ADD
, DL
, VT
, Cond
, N2
);
7654 // fold (select Cond, 0, 1) -> (xor Cond, 1)
7655 // We can't do this reliably if integer based booleans have different contents
7656 // to floating point based booleans. This is because we can't tell whether we
7657 // have an integer-based boolean or a floating-point-based boolean unless we
7658 // can find the SETCC that produced it and inspect its operands. This is
7659 // fairly easy if C is the SETCC node, but it can potentially be
7660 // undiscoverable (or not reasonably discoverable). For example, it could be
7661 // in another basic block or it could require searching a complicated
7663 if (CondVT
.isInteger() &&
7664 TLI
.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7665 TargetLowering::ZeroOrOneBooleanContent
&&
7666 TLI
.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7667 TargetLowering::ZeroOrOneBooleanContent
&&
7668 C1
->isNullValue() && C2
->isOne()) {
7670 DAG
.getNode(ISD::XOR
, DL
, CondVT
, Cond
, DAG
.getConstant(1, DL
, CondVT
));
7671 if (VT
.bitsEq(CondVT
))
7673 return DAG
.getZExtOrTrunc(NotCond
, DL
, VT
);
7679 SDValue
DAGCombiner::visitSELECT(SDNode
*N
) {
7680 SDValue N0
= N
->getOperand(0);
7681 SDValue N1
= N
->getOperand(1);
7682 SDValue N2
= N
->getOperand(2);
7683 EVT VT
= N
->getValueType(0);
7684 EVT VT0
= N0
.getValueType();
7687 if (SDValue V
= DAG
.simplifySelect(N0
, N1
, N2
))
7690 // fold (select X, X, Y) -> (or X, Y)
7691 // fold (select X, 1, Y) -> (or C, Y)
7692 if (VT
== VT0
&& VT
== MVT::i1
&& (N0
== N1
|| isOneConstant(N1
)))
7693 return DAG
.getNode(ISD::OR
, DL
, VT
, N0
, N2
);
7695 if (SDValue V
= foldSelectOfConstants(N
))
7698 // fold (select C, 0, X) -> (and (not C), X)
7699 if (VT
== VT0
&& VT
== MVT::i1
&& isNullConstant(N1
)) {
7700 SDValue NOTNode
= DAG
.getNOT(SDLoc(N0
), N0
, VT
);
7701 AddToWorklist(NOTNode
.getNode());
7702 return DAG
.getNode(ISD::AND
, DL
, VT
, NOTNode
, N2
);
7704 // fold (select C, X, 1) -> (or (not C), X)
7705 if (VT
== VT0
&& VT
== MVT::i1
&& isOneConstant(N2
)) {
7706 SDValue NOTNode
= DAG
.getNOT(SDLoc(N0
), N0
, VT
);
7707 AddToWorklist(NOTNode
.getNode());
7708 return DAG
.getNode(ISD::OR
, DL
, VT
, NOTNode
, N1
);
7710 // fold (select X, Y, X) -> (and X, Y)
7711 // fold (select X, Y, 0) -> (and X, Y)
7712 if (VT
== VT0
&& VT
== MVT::i1
&& (N0
== N2
|| isNullConstant(N2
)))
7713 return DAG
.getNode(ISD::AND
, DL
, VT
, N0
, N1
);
7715 // If we can fold this based on the true/false value, do so.
7716 if (SimplifySelectOps(N
, N1
, N2
))
7717 return SDValue(N
, 0); // Don't revisit N.
7719 if (VT0
== MVT::i1
) {
7720 // The code in this block deals with the following 2 equivalences:
7721 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7722 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7723 // The target can specify its preferred form with the
7724 // shouldNormalizeToSelectSequence() callback. However we always transform
7725 // to the right anyway if we find the inner select exists in the DAG anyway
7726 // and we always transform to the left side if we know that we can further
7727 // optimize the combination of the conditions.
7728 bool normalizeToSequence
=
7729 TLI
.shouldNormalizeToSelectSequence(*DAG
.getContext(), VT
);
7730 // select (and Cond0, Cond1), X, Y
7731 // -> select Cond0, (select Cond1, X, Y), Y
7732 if (N0
->getOpcode() == ISD::AND
&& N0
->hasOneUse()) {
7733 SDValue Cond0
= N0
->getOperand(0);
7734 SDValue Cond1
= N0
->getOperand(1);
7735 SDValue InnerSelect
=
7736 DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Cond1
, N1
, N2
);
7737 if (normalizeToSequence
|| !InnerSelect
.use_empty())
7738 return DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Cond0
,
7740 // Cleanup on failure.
7741 if (InnerSelect
.use_empty())
7742 recursivelyDeleteUnusedNodes(InnerSelect
.getNode());
7744 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7745 if (N0
->getOpcode() == ISD::OR
&& N0
->hasOneUse()) {
7746 SDValue Cond0
= N0
->getOperand(0);
7747 SDValue Cond1
= N0
->getOperand(1);
7748 SDValue InnerSelect
=
7749 DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Cond1
, N1
, N2
);
7750 if (normalizeToSequence
|| !InnerSelect
.use_empty())
7751 return DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Cond0
, N1
,
7753 // Cleanup on failure.
7754 if (InnerSelect
.use_empty())
7755 recursivelyDeleteUnusedNodes(InnerSelect
.getNode());
7758 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7759 if (N1
->getOpcode() == ISD::SELECT
&& N1
->hasOneUse()) {
7760 SDValue N1_0
= N1
->getOperand(0);
7761 SDValue N1_1
= N1
->getOperand(1);
7762 SDValue N1_2
= N1
->getOperand(2);
7763 if (N1_2
== N2
&& N0
.getValueType() == N1_0
.getValueType()) {
7764 // Create the actual and node if we can generate good code for it.
7765 if (!normalizeToSequence
) {
7766 SDValue And
= DAG
.getNode(ISD::AND
, DL
, N0
.getValueType(), N0
, N1_0
);
7767 return DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), And
, N1_1
, N2
);
7769 // Otherwise see if we can optimize the "and" to a better pattern.
7770 if (SDValue Combined
= visitANDLike(N0
, N1_0
, N
))
7771 return DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Combined
, N1_1
,
7775 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7776 if (N2
->getOpcode() == ISD::SELECT
&& N2
->hasOneUse()) {
7777 SDValue N2_0
= N2
->getOperand(0);
7778 SDValue N2_1
= N2
->getOperand(1);
7779 SDValue N2_2
= N2
->getOperand(2);
7780 if (N2_1
== N1
&& N0
.getValueType() == N2_0
.getValueType()) {
7781 // Create the actual or node if we can generate good code for it.
7782 if (!normalizeToSequence
) {
7783 SDValue Or
= DAG
.getNode(ISD::OR
, DL
, N0
.getValueType(), N0
, N2_0
);
7784 return DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Or
, N1
, N2_2
);
7786 // Otherwise see if we can optimize to a better pattern.
7787 if (SDValue Combined
= visitORLike(N0
, N2_0
, N
))
7788 return DAG
.getNode(ISD::SELECT
, DL
, N1
.getValueType(), Combined
, N1
,
7794 // select (not Cond), N1, N2 -> select Cond, N2, N1
7795 if (SDValue F
= extractBooleanFlip(N0
, TLI
))
7796 return DAG
.getSelect(DL
, VT
, F
, N2
, N1
);
7798 // Fold selects based on a setcc into other things, such as min/max/abs.
7799 if (N0
.getOpcode() == ISD::SETCC
) {
7800 SDValue Cond0
= N0
.getOperand(0), Cond1
= N0
.getOperand(1);
7801 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
7803 // select (fcmp lt x, y), x, y -> fminnum x, y
7804 // select (fcmp gt x, y), x, y -> fmaxnum x, y
7806 // This is OK if we don't care what happens if either operand is a NaN.
7807 if (N0
.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG
, N1
, N2
, TLI
))
7808 if (SDValue FMinMax
= combineMinNumMaxNum(DL
, VT
, Cond0
, Cond1
, N1
, N2
,
7812 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7813 // This is conservatively limited to pre-legal-operations to give targets
7814 // a chance to reverse the transform if they want to do that. Also, it is
7815 // unlikely that the pattern would be formed late, so it's probably not
7816 // worth going through the other checks.
7817 if (!LegalOperations
&& TLI
.isOperationLegalOrCustom(ISD::UADDO
, VT
) &&
7818 CC
== ISD::SETUGT
&& N0
.hasOneUse() && isAllOnesConstant(N1
) &&
7819 N2
.getOpcode() == ISD::ADD
&& Cond0
== N2
.getOperand(0)) {
7820 auto *C
= dyn_cast
<ConstantSDNode
>(N2
.getOperand(1));
7821 auto *NotC
= dyn_cast
<ConstantSDNode
>(Cond1
);
7822 if (C
&& NotC
&& C
->getAPIntValue() == ~NotC
->getAPIntValue()) {
7823 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7824 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7826 // The IR equivalent of this transform would have this form:
7828 // %c = icmp ugt %x, ~C
7829 // %r = select %c, -1, %a
7831 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7832 // %u0 = extractvalue %u, 0
7833 // %u1 = extractvalue %u, 1
7834 // %r = select %u1, -1, %u0
7835 SDVTList VTs
= DAG
.getVTList(VT
, VT0
);
7836 SDValue UAO
= DAG
.getNode(ISD::UADDO
, DL
, VTs
, Cond0
, N2
.getOperand(1));
7837 return DAG
.getSelect(DL
, VT
, UAO
.getValue(1), N1
, UAO
.getValue(0));
7841 if (TLI
.isOperationLegal(ISD::SELECT_CC
, VT
) ||
7842 (!LegalOperations
&& TLI
.isOperationLegalOrCustom(ISD::SELECT_CC
, VT
)))
7843 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Cond0
, Cond1
, N1
, N2
,
7846 return SimplifySelect(DL
, N0
, N1
, N2
);
7853 std::pair
<SDValue
, SDValue
> SplitVSETCC(const SDNode
*N
, SelectionDAG
&DAG
) {
7856 std::tie(LoVT
, HiVT
) = DAG
.GetSplitDestVTs(N
->getValueType(0));
7858 // Split the inputs.
7859 SDValue Lo
, Hi
, LL
, LH
, RL
, RH
;
7860 std::tie(LL
, LH
) = DAG
.SplitVectorOperand(N
, 0);
7861 std::tie(RL
, RH
) = DAG
.SplitVectorOperand(N
, 1);
7863 Lo
= DAG
.getNode(N
->getOpcode(), DL
, LoVT
, LL
, RL
, N
->getOperand(2));
7864 Hi
= DAG
.getNode(N
->getOpcode(), DL
, HiVT
, LH
, RH
, N
->getOperand(2));
7866 return std::make_pair(Lo
, Hi
);
7869 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7870 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7871 static SDValue
ConvertSelectToConcatVector(SDNode
*N
, SelectionDAG
&DAG
) {
7873 SDValue Cond
= N
->getOperand(0);
7874 SDValue LHS
= N
->getOperand(1);
7875 SDValue RHS
= N
->getOperand(2);
7876 EVT VT
= N
->getValueType(0);
7877 int NumElems
= VT
.getVectorNumElements();
7878 assert(LHS
.getOpcode() == ISD::CONCAT_VECTORS
&&
7879 RHS
.getOpcode() == ISD::CONCAT_VECTORS
&&
7880 Cond
.getOpcode() == ISD::BUILD_VECTOR
);
7882 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7883 // binary ones here.
7884 if (LHS
->getNumOperands() != 2 || RHS
->getNumOperands() != 2)
7887 // We're sure we have an even number of elements due to the
7888 // concat_vectors we have as arguments to vselect.
7889 // Skip BV elements until we find one that's not an UNDEF
7890 // After we find an UNDEF element, keep looping until we get to half the
7891 // length of the BV and see if all the non-undef nodes are the same.
7892 ConstantSDNode
*BottomHalf
= nullptr;
7893 for (int i
= 0; i
< NumElems
/ 2; ++i
) {
7894 if (Cond
->getOperand(i
)->isUndef())
7897 if (BottomHalf
== nullptr)
7898 BottomHalf
= cast
<ConstantSDNode
>(Cond
.getOperand(i
));
7899 else if (Cond
->getOperand(i
).getNode() != BottomHalf
)
7903 // Do the same for the second half of the BuildVector
7904 ConstantSDNode
*TopHalf
= nullptr;
7905 for (int i
= NumElems
/ 2; i
< NumElems
; ++i
) {
7906 if (Cond
->getOperand(i
)->isUndef())
7909 if (TopHalf
== nullptr)
7910 TopHalf
= cast
<ConstantSDNode
>(Cond
.getOperand(i
));
7911 else if (Cond
->getOperand(i
).getNode() != TopHalf
)
7915 assert(TopHalf
&& BottomHalf
&&
7916 "One half of the selector was all UNDEFs and the other was all the "
7917 "same value. This should have been addressed before this function.");
7919 ISD::CONCAT_VECTORS
, DL
, VT
,
7920 BottomHalf
->isNullValue() ? RHS
->getOperand(0) : LHS
->getOperand(0),
7921 TopHalf
->isNullValue() ? RHS
->getOperand(1) : LHS
->getOperand(1));
7924 SDValue
DAGCombiner::visitMSCATTER(SDNode
*N
) {
7925 if (Level
>= AfterLegalizeTypes
)
7928 MaskedScatterSDNode
*MSC
= cast
<MaskedScatterSDNode
>(N
);
7929 SDValue Mask
= MSC
->getMask();
7930 SDValue Data
= MSC
->getValue();
7933 // If the MSCATTER data type requires splitting and the mask is provided by a
7934 // SETCC, then split both nodes and its operands before legalization. This
7935 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7936 // and enables future optimizations (e.g. min/max pattern matching on X86).
7937 if (Mask
.getOpcode() != ISD::SETCC
)
7940 // Check if any splitting is required.
7941 if (TLI
.getTypeAction(*DAG
.getContext(), Data
.getValueType()) !=
7942 TargetLowering::TypeSplitVector
)
7944 SDValue MaskLo
, MaskHi
;
7945 std::tie(MaskLo
, MaskHi
) = SplitVSETCC(Mask
.getNode(), DAG
);
7948 std::tie(LoVT
, HiVT
) = DAG
.GetSplitDestVTs(MSC
->getValueType(0));
7950 SDValue Chain
= MSC
->getChain();
7952 EVT MemoryVT
= MSC
->getMemoryVT();
7953 unsigned Alignment
= MSC
->getOriginalAlignment();
7955 EVT LoMemVT
, HiMemVT
;
7956 std::tie(LoMemVT
, HiMemVT
) = DAG
.GetSplitDestVTs(MemoryVT
);
7958 SDValue DataLo
, DataHi
;
7959 std::tie(DataLo
, DataHi
) = DAG
.SplitVector(Data
, DL
);
7961 SDValue Scale
= MSC
->getScale();
7962 SDValue BasePtr
= MSC
->getBasePtr();
7963 SDValue IndexLo
, IndexHi
;
7964 std::tie(IndexLo
, IndexHi
) = DAG
.SplitVector(MSC
->getIndex(), DL
);
7966 MachineMemOperand
*MMO
= DAG
.getMachineFunction().
7967 getMachineMemOperand(MSC
->getPointerInfo(),
7968 MachineMemOperand::MOStore
, LoMemVT
.getStoreSize(),
7969 Alignment
, MSC
->getAAInfo(), MSC
->getRanges());
7971 SDValue OpsLo
[] = { Chain
, DataLo
, MaskLo
, BasePtr
, IndexLo
, Scale
};
7972 SDValue Lo
= DAG
.getMaskedScatter(DAG
.getVTList(MVT::Other
),
7973 DataLo
.getValueType(), DL
, OpsLo
, MMO
);
7975 // The order of the Scatter operation after split is well defined. The "Hi"
7976 // part comes after the "Lo". So these two operations should be chained one
7978 SDValue OpsHi
[] = { Lo
, DataHi
, MaskHi
, BasePtr
, IndexHi
, Scale
};
7979 return DAG
.getMaskedScatter(DAG
.getVTList(MVT::Other
), DataHi
.getValueType(),
7983 SDValue
DAGCombiner::visitMSTORE(SDNode
*N
) {
7984 if (Level
>= AfterLegalizeTypes
)
7987 MaskedStoreSDNode
*MST
= dyn_cast
<MaskedStoreSDNode
>(N
);
7988 SDValue Mask
= MST
->getMask();
7989 SDValue Data
= MST
->getValue();
7990 EVT VT
= Data
.getValueType();
7993 // If the MSTORE data type requires splitting and the mask is provided by a
7994 // SETCC, then split both nodes and its operands before legalization. This
7995 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7996 // and enables future optimizations (e.g. min/max pattern matching on X86).
7997 if (Mask
.getOpcode() == ISD::SETCC
) {
7998 // Check if any splitting is required.
7999 if (TLI
.getTypeAction(*DAG
.getContext(), VT
) !=
8000 TargetLowering::TypeSplitVector
)
8003 SDValue MaskLo
, MaskHi
, Lo
, Hi
;
8004 std::tie(MaskLo
, MaskHi
) = SplitVSETCC(Mask
.getNode(), DAG
);
8006 SDValue Chain
= MST
->getChain();
8007 SDValue Ptr
= MST
->getBasePtr();
8009 EVT MemoryVT
= MST
->getMemoryVT();
8010 unsigned Alignment
= MST
->getOriginalAlignment();
8012 // if Alignment is equal to the vector size,
8013 // take the half of it for the second part
8014 unsigned SecondHalfAlignment
=
8015 (Alignment
== VT
.getSizeInBits() / 8) ? Alignment
/ 2 : Alignment
;
8017 EVT LoMemVT
, HiMemVT
;
8018 std::tie(LoMemVT
, HiMemVT
) = DAG
.GetSplitDestVTs(MemoryVT
);
8020 SDValue DataLo
, DataHi
;
8021 std::tie(DataLo
, DataHi
) = DAG
.SplitVector(Data
, DL
);
8023 MachineMemOperand
*MMO
= DAG
.getMachineFunction().
8024 getMachineMemOperand(MST
->getPointerInfo(),
8025 MachineMemOperand::MOStore
, LoMemVT
.getStoreSize(),
8026 Alignment
, MST
->getAAInfo(), MST
->getRanges());
8028 Lo
= DAG
.getMaskedStore(Chain
, DL
, DataLo
, Ptr
, MaskLo
, LoMemVT
, MMO
,
8029 MST
->isTruncatingStore(),
8030 MST
->isCompressingStore());
8032 Ptr
= TLI
.IncrementMemoryAddress(Ptr
, MaskLo
, DL
, LoMemVT
, DAG
,
8033 MST
->isCompressingStore());
8034 unsigned HiOffset
= LoMemVT
.getStoreSize();
8036 MMO
= DAG
.getMachineFunction().getMachineMemOperand(
8037 MST
->getPointerInfo().getWithOffset(HiOffset
),
8038 MachineMemOperand::MOStore
, HiMemVT
.getStoreSize(), SecondHalfAlignment
,
8039 MST
->getAAInfo(), MST
->getRanges());
8041 Hi
= DAG
.getMaskedStore(Chain
, DL
, DataHi
, Ptr
, MaskHi
, HiMemVT
, MMO
,
8042 MST
->isTruncatingStore(),
8043 MST
->isCompressingStore());
8045 AddToWorklist(Lo
.getNode());
8046 AddToWorklist(Hi
.getNode());
8048 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Lo
, Hi
);
8053 SDValue
DAGCombiner::visitMGATHER(SDNode
*N
) {
8054 if (Level
>= AfterLegalizeTypes
)
8057 MaskedGatherSDNode
*MGT
= cast
<MaskedGatherSDNode
>(N
);
8058 SDValue Mask
= MGT
->getMask();
8061 // If the MGATHER result requires splitting and the mask is provided by a
8062 // SETCC, then split both nodes and its operands before legalization. This
8063 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8064 // and enables future optimizations (e.g. min/max pattern matching on X86).
8066 if (Mask
.getOpcode() != ISD::SETCC
)
8069 EVT VT
= N
->getValueType(0);
8071 // Check if any splitting is required.
8072 if (TLI
.getTypeAction(*DAG
.getContext(), VT
) !=
8073 TargetLowering::TypeSplitVector
)
8076 SDValue MaskLo
, MaskHi
, Lo
, Hi
;
8077 std::tie(MaskLo
, MaskHi
) = SplitVSETCC(Mask
.getNode(), DAG
);
8079 SDValue PassThru
= MGT
->getPassThru();
8080 SDValue PassThruLo
, PassThruHi
;
8081 std::tie(PassThruLo
, PassThruHi
) = DAG
.SplitVector(PassThru
, DL
);
8084 std::tie(LoVT
, HiVT
) = DAG
.GetSplitDestVTs(VT
);
8086 SDValue Chain
= MGT
->getChain();
8087 EVT MemoryVT
= MGT
->getMemoryVT();
8088 unsigned Alignment
= MGT
->getOriginalAlignment();
8090 EVT LoMemVT
, HiMemVT
;
8091 std::tie(LoMemVT
, HiMemVT
) = DAG
.GetSplitDestVTs(MemoryVT
);
8093 SDValue Scale
= MGT
->getScale();
8094 SDValue BasePtr
= MGT
->getBasePtr();
8095 SDValue Index
= MGT
->getIndex();
8096 SDValue IndexLo
, IndexHi
;
8097 std::tie(IndexLo
, IndexHi
) = DAG
.SplitVector(Index
, DL
);
8099 MachineMemOperand
*MMO
= DAG
.getMachineFunction().
8100 getMachineMemOperand(MGT
->getPointerInfo(),
8101 MachineMemOperand::MOLoad
, LoMemVT
.getStoreSize(),
8102 Alignment
, MGT
->getAAInfo(), MGT
->getRanges());
8104 SDValue OpsLo
[] = { Chain
, PassThruLo
, MaskLo
, BasePtr
, IndexLo
, Scale
};
8105 Lo
= DAG
.getMaskedGather(DAG
.getVTList(LoVT
, MVT::Other
), LoVT
, DL
, OpsLo
,
8108 SDValue OpsHi
[] = { Chain
, PassThruHi
, MaskHi
, BasePtr
, IndexHi
, Scale
};
8109 Hi
= DAG
.getMaskedGather(DAG
.getVTList(HiVT
, MVT::Other
), HiVT
, DL
, OpsHi
,
8112 AddToWorklist(Lo
.getNode());
8113 AddToWorklist(Hi
.getNode());
8115 // Build a factor node to remember that this load is independent of the
8117 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Lo
.getValue(1),
8120 // Legalized the chain result - switch anything that used the old chain to
8122 DAG
.ReplaceAllUsesOfValueWith(SDValue(MGT
, 1), Chain
);
8124 SDValue GatherRes
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, Lo
, Hi
);
8126 SDValue RetOps
[] = { GatherRes
, Chain
};
8127 return DAG
.getMergeValues(RetOps
, DL
);
8130 SDValue
DAGCombiner::visitMLOAD(SDNode
*N
) {
8131 if (Level
>= AfterLegalizeTypes
)
8134 MaskedLoadSDNode
*MLD
= dyn_cast
<MaskedLoadSDNode
>(N
);
8135 SDValue Mask
= MLD
->getMask();
8138 // If the MLOAD result requires splitting and the mask is provided by a
8139 // SETCC, then split both nodes and its operands before legalization. This
8140 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8141 // and enables future optimizations (e.g. min/max pattern matching on X86).
8142 if (Mask
.getOpcode() == ISD::SETCC
) {
8143 EVT VT
= N
->getValueType(0);
8145 // Check if any splitting is required.
8146 if (TLI
.getTypeAction(*DAG
.getContext(), VT
) !=
8147 TargetLowering::TypeSplitVector
)
8150 SDValue MaskLo
, MaskHi
, Lo
, Hi
;
8151 std::tie(MaskLo
, MaskHi
) = SplitVSETCC(Mask
.getNode(), DAG
);
8153 SDValue PassThru
= MLD
->getPassThru();
8154 SDValue PassThruLo
, PassThruHi
;
8155 std::tie(PassThruLo
, PassThruHi
) = DAG
.SplitVector(PassThru
, DL
);
8158 std::tie(LoVT
, HiVT
) = DAG
.GetSplitDestVTs(MLD
->getValueType(0));
8160 SDValue Chain
= MLD
->getChain();
8161 SDValue Ptr
= MLD
->getBasePtr();
8162 EVT MemoryVT
= MLD
->getMemoryVT();
8163 unsigned Alignment
= MLD
->getOriginalAlignment();
8165 // if Alignment is equal to the vector size,
8166 // take the half of it for the second part
8167 unsigned SecondHalfAlignment
=
8168 (Alignment
== MLD
->getValueType(0).getSizeInBits()/8) ?
8169 Alignment
/2 : Alignment
;
8171 EVT LoMemVT
, HiMemVT
;
8172 std::tie(LoMemVT
, HiMemVT
) = DAG
.GetSplitDestVTs(MemoryVT
);
8174 MachineMemOperand
*MMO
= DAG
.getMachineFunction().
8175 getMachineMemOperand(MLD
->getPointerInfo(),
8176 MachineMemOperand::MOLoad
, LoMemVT
.getStoreSize(),
8177 Alignment
, MLD
->getAAInfo(), MLD
->getRanges());
8179 Lo
= DAG
.getMaskedLoad(LoVT
, DL
, Chain
, Ptr
, MaskLo
, PassThruLo
, LoMemVT
,
8180 MMO
, ISD::NON_EXTLOAD
, MLD
->isExpandingLoad());
8182 Ptr
= TLI
.IncrementMemoryAddress(Ptr
, MaskLo
, DL
, LoMemVT
, DAG
,
8183 MLD
->isExpandingLoad());
8184 unsigned HiOffset
= LoMemVT
.getStoreSize();
8186 MMO
= DAG
.getMachineFunction().getMachineMemOperand(
8187 MLD
->getPointerInfo().getWithOffset(HiOffset
),
8188 MachineMemOperand::MOLoad
, HiMemVT
.getStoreSize(), SecondHalfAlignment
,
8189 MLD
->getAAInfo(), MLD
->getRanges());
8191 Hi
= DAG
.getMaskedLoad(HiVT
, DL
, Chain
, Ptr
, MaskHi
, PassThruHi
, HiMemVT
,
8192 MMO
, ISD::NON_EXTLOAD
, MLD
->isExpandingLoad());
8194 AddToWorklist(Lo
.getNode());
8195 AddToWorklist(Hi
.getNode());
8197 // Build a factor node to remember that this load is independent of the
8199 Chain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Lo
.getValue(1),
8202 // Legalized the chain result - switch anything that used the old chain to
8204 DAG
.ReplaceAllUsesOfValueWith(SDValue(MLD
, 1), Chain
);
8206 SDValue LoadRes
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, Lo
, Hi
);
8208 SDValue RetOps
[] = { LoadRes
, Chain
};
8209 return DAG
.getMergeValues(RetOps
, DL
);
8214 /// A vector select of 2 constant vectors can be simplified to math/logic to
8215 /// avoid a variable select instruction and possibly avoid constant loads.
8216 SDValue
DAGCombiner::foldVSelectOfConstants(SDNode
*N
) {
8217 SDValue Cond
= N
->getOperand(0);
8218 SDValue N1
= N
->getOperand(1);
8219 SDValue N2
= N
->getOperand(2);
8220 EVT VT
= N
->getValueType(0);
8221 if (!Cond
.hasOneUse() || Cond
.getScalarValueSizeInBits() != 1 ||
8222 !TLI
.convertSelectOfConstantsToMath(VT
) ||
8223 !ISD::isBuildVectorOfConstantSDNodes(N1
.getNode()) ||
8224 !ISD::isBuildVectorOfConstantSDNodes(N2
.getNode()))
8227 // Check if we can use the condition value to increment/decrement a single
8228 // constant value. This simplifies a select to an add and removes a constant
8229 // load/materialization from the general case.
8230 bool AllAddOne
= true;
8231 bool AllSubOne
= true;
8232 unsigned Elts
= VT
.getVectorNumElements();
8233 for (unsigned i
= 0; i
!= Elts
; ++i
) {
8234 SDValue N1Elt
= N1
.getOperand(i
);
8235 SDValue N2Elt
= N2
.getOperand(i
);
8236 if (N1Elt
.isUndef() || N2Elt
.isUndef())
8239 const APInt
&C1
= cast
<ConstantSDNode
>(N1Elt
)->getAPIntValue();
8240 const APInt
&C2
= cast
<ConstantSDNode
>(N2Elt
)->getAPIntValue();
8247 // Further simplifications for the extra-special cases where the constants are
8248 // all 0 or all -1 should be implemented as folds of these patterns.
8250 if (AllAddOne
|| AllSubOne
) {
8251 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8252 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8253 auto ExtendOpcode
= AllAddOne
? ISD::ZERO_EXTEND
: ISD::SIGN_EXTEND
;
8254 SDValue ExtendedCond
= DAG
.getNode(ExtendOpcode
, DL
, VT
, Cond
);
8255 return DAG
.getNode(ISD::ADD
, DL
, VT
, ExtendedCond
, N2
);
8258 // The general case for select-of-constants:
8259 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8260 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8261 // leave that to a machine-specific pass.
8265 SDValue
DAGCombiner::visitVSELECT(SDNode
*N
) {
8266 SDValue N0
= N
->getOperand(0);
8267 SDValue N1
= N
->getOperand(1);
8268 SDValue N2
= N
->getOperand(2);
8269 EVT VT
= N
->getValueType(0);
8272 if (SDValue V
= DAG
.simplifySelect(N0
, N1
, N2
))
8275 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8276 if (SDValue F
= extractBooleanFlip(N0
, TLI
))
8277 return DAG
.getSelect(DL
, VT
, F
, N2
, N1
);
8279 // Canonicalize integer abs.
8280 // vselect (setg[te] X, 0), X, -X ->
8281 // vselect (setgt X, -1), X, -X ->
8282 // vselect (setl[te] X, 0), -X, X ->
8283 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8284 if (N0
.getOpcode() == ISD::SETCC
) {
8285 SDValue LHS
= N0
.getOperand(0), RHS
= N0
.getOperand(1);
8286 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
8288 bool RHSIsAllZeros
= ISD::isBuildVectorAllZeros(RHS
.getNode());
8290 if (((RHSIsAllZeros
&& (CC
== ISD::SETGT
|| CC
== ISD::SETGE
)) ||
8291 (ISD::isBuildVectorAllOnes(RHS
.getNode()) && CC
== ISD::SETGT
)) &&
8292 N1
== LHS
&& N2
.getOpcode() == ISD::SUB
&& N1
== N2
.getOperand(1))
8293 isAbs
= ISD::isBuildVectorAllZeros(N2
.getOperand(0).getNode());
8294 else if ((RHSIsAllZeros
&& (CC
== ISD::SETLT
|| CC
== ISD::SETLE
)) &&
8295 N2
== LHS
&& N1
.getOpcode() == ISD::SUB
&& N2
== N1
.getOperand(1))
8296 isAbs
= ISD::isBuildVectorAllZeros(N1
.getOperand(0).getNode());
8299 EVT VT
= LHS
.getValueType();
8300 if (TLI
.isOperationLegalOrCustom(ISD::ABS
, VT
))
8301 return DAG
.getNode(ISD::ABS
, DL
, VT
, LHS
);
8303 SDValue Shift
= DAG
.getNode(
8304 ISD::SRA
, DL
, VT
, LHS
,
8305 DAG
.getConstant(VT
.getScalarSizeInBits() - 1, DL
, VT
));
8306 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, VT
, LHS
, Shift
);
8307 AddToWorklist(Shift
.getNode());
8308 AddToWorklist(Add
.getNode());
8309 return DAG
.getNode(ISD::XOR
, DL
, VT
, Add
, Shift
);
8312 // vselect x, y (fcmp lt x, y) -> fminnum x, y
8313 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8315 // This is OK if we don't care about what happens if either operand is a
8318 if (N0
.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG
, N0
.getOperand(0),
8319 N0
.getOperand(1), TLI
)) {
8320 if (SDValue FMinMax
= combineMinNumMaxNum(
8321 DL
, VT
, N0
.getOperand(0), N0
.getOperand(1), N1
, N2
, CC
, TLI
, DAG
))
8325 // If this select has a condition (setcc) with narrower operands than the
8326 // select, try to widen the compare to match the select width.
8327 // TODO: This should be extended to handle any constant.
8328 // TODO: This could be extended to handle non-loading patterns, but that
8329 // requires thorough testing to avoid regressions.
8330 if (isNullOrNullSplat(RHS
)) {
8331 EVT NarrowVT
= LHS
.getValueType();
8332 EVT WideVT
= N1
.getValueType().changeVectorElementTypeToInteger();
8333 EVT SetCCVT
= getSetCCResultType(LHS
.getValueType());
8334 unsigned SetCCWidth
= SetCCVT
.getScalarSizeInBits();
8335 unsigned WideWidth
= WideVT
.getScalarSizeInBits();
8336 bool IsSigned
= isSignedIntSetCC(CC
);
8337 auto LoadExtOpcode
= IsSigned
? ISD::SEXTLOAD
: ISD::ZEXTLOAD
;
8338 if (LHS
.getOpcode() == ISD::LOAD
&& LHS
.hasOneUse() &&
8339 SetCCWidth
!= 1 && SetCCWidth
< WideWidth
&&
8340 TLI
.isLoadExtLegalOrCustom(LoadExtOpcode
, WideVT
, NarrowVT
) &&
8341 TLI
.isOperationLegalOrCustom(ISD::SETCC
, WideVT
)) {
8342 // Both compare operands can be widened for free. The LHS can use an
8343 // extended load, and the RHS is a constant:
8344 // vselect (ext (setcc load(X), C)), N1, N2 -->
8345 // vselect (setcc extload(X), C'), N1, N2
8346 auto ExtOpcode
= IsSigned
? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
8347 SDValue WideLHS
= DAG
.getNode(ExtOpcode
, DL
, WideVT
, LHS
);
8348 SDValue WideRHS
= DAG
.getNode(ExtOpcode
, DL
, WideVT
, RHS
);
8349 EVT WideSetCCVT
= getSetCCResultType(WideVT
);
8350 SDValue WideSetCC
= DAG
.getSetCC(DL
, WideSetCCVT
, WideLHS
, WideRHS
, CC
);
8351 return DAG
.getSelect(DL
, N1
.getValueType(), WideSetCC
, N1
, N2
);
8356 if (SimplifySelectOps(N
, N1
, N2
))
8357 return SDValue(N
, 0); // Don't revisit N.
8359 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8360 if (ISD::isBuildVectorAllOnes(N0
.getNode()))
8362 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8363 if (ISD::isBuildVectorAllZeros(N0
.getNode()))
8366 // The ConvertSelectToConcatVector function is assuming both the above
8367 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8369 if (N1
.getOpcode() == ISD::CONCAT_VECTORS
&&
8370 N2
.getOpcode() == ISD::CONCAT_VECTORS
&&
8371 ISD::isBuildVectorOfConstantSDNodes(N0
.getNode())) {
8372 if (SDValue CV
= ConvertSelectToConcatVector(N
, DAG
))
8376 if (SDValue V
= foldVSelectOfConstants(N
))
8382 SDValue
DAGCombiner::visitSELECT_CC(SDNode
*N
) {
8383 SDValue N0
= N
->getOperand(0);
8384 SDValue N1
= N
->getOperand(1);
8385 SDValue N2
= N
->getOperand(2);
8386 SDValue N3
= N
->getOperand(3);
8387 SDValue N4
= N
->getOperand(4);
8388 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N4
)->get();
8390 // fold select_cc lhs, rhs, x, x, cc -> x
8394 // Determine if the condition we're dealing with is constant
8395 if (SDValue SCC
= SimplifySetCC(getSetCCResultType(N0
.getValueType()), N0
, N1
,
8396 CC
, SDLoc(N
), false)) {
8397 AddToWorklist(SCC
.getNode());
8399 if (ConstantSDNode
*SCCC
= dyn_cast
<ConstantSDNode
>(SCC
.getNode())) {
8400 if (!SCCC
->isNullValue())
8401 return N2
; // cond always true -> true val
8403 return N3
; // cond always false -> false val
8404 } else if (SCC
->isUndef()) {
8405 // When the condition is UNDEF, just return the first operand. This is
8406 // coherent the DAG creation, no setcc node is created in this case
8408 } else if (SCC
.getOpcode() == ISD::SETCC
) {
8409 // Fold to a simpler select_cc
8410 return DAG
.getNode(ISD::SELECT_CC
, SDLoc(N
), N2
.getValueType(),
8411 SCC
.getOperand(0), SCC
.getOperand(1), N2
, N3
,
8416 // If we can fold this based on the true/false value, do so.
8417 if (SimplifySelectOps(N
, N2
, N3
))
8418 return SDValue(N
, 0); // Don't revisit N.
8420 // fold select_cc into other things, such as min/max/abs
8421 return SimplifySelectCC(SDLoc(N
), N0
, N1
, N2
, N3
, CC
);
8424 SDValue
DAGCombiner::visitSETCC(SDNode
*N
) {
8425 // setcc is very commonly used as an argument to brcond. This pattern
8426 // also lend itself to numerous combines and, as a result, it is desired
8427 // we keep the argument to a brcond as a setcc as much as possible.
8429 N
->hasOneUse() && N
->use_begin()->getOpcode() == ISD::BRCOND
;
8431 SDValue Combined
= SimplifySetCC(
8432 N
->getValueType(0), N
->getOperand(0), N
->getOperand(1),
8433 cast
<CondCodeSDNode
>(N
->getOperand(2))->get(), SDLoc(N
), !PreferSetCC
);
8438 // If we prefer to have a setcc, and we don't, we'll try our best to
8439 // recreate one using rebuildSetCC.
8440 if (PreferSetCC
&& Combined
.getOpcode() != ISD::SETCC
) {
8441 SDValue NewSetCC
= rebuildSetCC(Combined
);
8443 // We don't have anything interesting to combine to.
8444 if (NewSetCC
.getNode() == N
)
8454 SDValue
DAGCombiner::visitSETCCCARRY(SDNode
*N
) {
8455 SDValue LHS
= N
->getOperand(0);
8456 SDValue RHS
= N
->getOperand(1);
8457 SDValue Carry
= N
->getOperand(2);
8458 SDValue Cond
= N
->getOperand(3);
8460 // If Carry is false, fold to a regular SETCC.
8461 if (isNullConstant(Carry
))
8462 return DAG
.getNode(ISD::SETCC
, SDLoc(N
), N
->getVTList(), LHS
, RHS
, Cond
);
8467 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8468 /// a build_vector of constants.
8469 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8470 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8471 /// Vector extends are not folded if operations are legal; this is to
8472 /// avoid introducing illegal build_vector dag nodes.
8473 static SDValue
tryToFoldExtendOfConstant(SDNode
*N
, const TargetLowering
&TLI
,
8474 SelectionDAG
&DAG
, bool LegalTypes
) {
8475 unsigned Opcode
= N
->getOpcode();
8476 SDValue N0
= N
->getOperand(0);
8477 EVT VT
= N
->getValueType(0);
8479 assert((Opcode
== ISD::SIGN_EXTEND
|| Opcode
== ISD::ZERO_EXTEND
||
8480 Opcode
== ISD::ANY_EXTEND
|| Opcode
== ISD::SIGN_EXTEND_VECTOR_INREG
||
8481 Opcode
== ISD::ZERO_EXTEND_VECTOR_INREG
)
8482 && "Expected EXTEND dag node in input!");
8484 // fold (sext c1) -> c1
8485 // fold (zext c1) -> c1
8486 // fold (aext c1) -> c1
8487 if (isa
<ConstantSDNode
>(N0
))
8488 return DAG
.getNode(Opcode
, SDLoc(N
), VT
, N0
);
8490 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8491 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8492 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8493 EVT SVT
= VT
.getScalarType();
8494 if (!(VT
.isVector() && (!LegalTypes
|| TLI
.isTypeLegal(SVT
)) &&
8495 ISD::isBuildVectorOfConstantSDNodes(N0
.getNode())))
8498 // We can fold this node into a build_vector.
8499 unsigned VTBits
= SVT
.getSizeInBits();
8500 unsigned EVTBits
= N0
->getValueType(0).getScalarSizeInBits();
8501 SmallVector
<SDValue
, 8> Elts
;
8502 unsigned NumElts
= VT
.getVectorNumElements();
8505 // For zero-extensions, UNDEF elements still guarantee to have the upper
8506 // bits set to zero.
8508 Opcode
== ISD::ZERO_EXTEND
|| Opcode
== ISD::ZERO_EXTEND_VECTOR_INREG
;
8510 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
8511 SDValue Op
= N0
.getOperand(i
);
8513 Elts
.push_back(IsZext
? DAG
.getConstant(0, DL
, SVT
) : DAG
.getUNDEF(SVT
));
8518 // Get the constant value and if needed trunc it to the size of the type.
8519 // Nodes like build_vector might have constants wider than the scalar type.
8520 APInt C
= cast
<ConstantSDNode
>(Op
)->getAPIntValue().zextOrTrunc(EVTBits
);
8521 if (Opcode
== ISD::SIGN_EXTEND
|| Opcode
== ISD::SIGN_EXTEND_VECTOR_INREG
)
8522 Elts
.push_back(DAG
.getConstant(C
.sext(VTBits
), DL
, SVT
));
8524 Elts
.push_back(DAG
.getConstant(C
.zext(VTBits
), DL
, SVT
));
8527 return DAG
.getBuildVector(VT
, DL
, Elts
);
8530 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8531 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8532 // transformation. Returns true if extension are possible and the above
8533 // mentioned transformation is profitable.
8534 static bool ExtendUsesToFormExtLoad(EVT VT
, SDNode
*N
, SDValue N0
,
8536 SmallVectorImpl
<SDNode
*> &ExtendNodes
,
8537 const TargetLowering
&TLI
) {
8538 bool HasCopyToRegUses
= false;
8539 bool isTruncFree
= TLI
.isTruncateFree(VT
, N0
.getValueType());
8540 for (SDNode::use_iterator UI
= N0
.getNode()->use_begin(),
8541 UE
= N0
.getNode()->use_end();
8546 if (UI
.getUse().getResNo() != N0
.getResNo())
8548 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8549 if (ExtOpc
!= ISD::ANY_EXTEND
&& User
->getOpcode() == ISD::SETCC
) {
8550 ISD::CondCode CC
= cast
<CondCodeSDNode
>(User
->getOperand(2))->get();
8551 if (ExtOpc
== ISD::ZERO_EXTEND
&& ISD::isSignedIntSetCC(CC
))
8552 // Sign bits will be lost after a zext.
8555 for (unsigned i
= 0; i
!= 2; ++i
) {
8556 SDValue UseOp
= User
->getOperand(i
);
8559 if (!isa
<ConstantSDNode
>(UseOp
))
8564 ExtendNodes
.push_back(User
);
8567 // If truncates aren't free and there are users we can't
8568 // extend, it isn't worthwhile.
8571 // Remember if this value is live-out.
8572 if (User
->getOpcode() == ISD::CopyToReg
)
8573 HasCopyToRegUses
= true;
8576 if (HasCopyToRegUses
) {
8577 bool BothLiveOut
= false;
8578 for (SDNode::use_iterator UI
= N
->use_begin(), UE
= N
->use_end();
8580 SDUse
&Use
= UI
.getUse();
8581 if (Use
.getResNo() == 0 && Use
.getUser()->getOpcode() == ISD::CopyToReg
) {
8587 // Both unextended and extended values are live out. There had better be
8588 // a good reason for the transformation.
8589 return ExtendNodes
.size();
8594 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl
<SDNode
*> &SetCCs
,
8595 SDValue OrigLoad
, SDValue ExtLoad
,
8596 ISD::NodeType ExtType
) {
8597 // Extend SetCC uses if necessary.
8599 for (SDNode
*SetCC
: SetCCs
) {
8600 SmallVector
<SDValue
, 4> Ops
;
8602 for (unsigned j
= 0; j
!= 2; ++j
) {
8603 SDValue SOp
= SetCC
->getOperand(j
);
8604 if (SOp
== OrigLoad
)
8605 Ops
.push_back(ExtLoad
);
8607 Ops
.push_back(DAG
.getNode(ExtType
, DL
, ExtLoad
->getValueType(0), SOp
));
8610 Ops
.push_back(SetCC
->getOperand(2));
8611 CombineTo(SetCC
, DAG
.getNode(ISD::SETCC
, DL
, SetCC
->getValueType(0), Ops
));
8615 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8616 SDValue
DAGCombiner::CombineExtLoad(SDNode
*N
) {
8617 SDValue N0
= N
->getOperand(0);
8618 EVT DstVT
= N
->getValueType(0);
8619 EVT SrcVT
= N0
.getValueType();
8621 assert((N
->getOpcode() == ISD::SIGN_EXTEND
||
8622 N
->getOpcode() == ISD::ZERO_EXTEND
) &&
8623 "Unexpected node type (not an extend)!");
8625 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8626 // For example, on a target with legal v4i32, but illegal v8i32, turn:
8627 // (v8i32 (sext (v8i16 (load x))))
8629 // (v8i32 (concat_vectors (v4i32 (sextload x)),
8630 // (v4i32 (sextload (x + 16)))))
8631 // Where uses of the original load, i.e.:
8633 // are replaced with:
8635 // (v8i32 (concat_vectors (v4i32 (sextload x)),
8636 // (v4i32 (sextload (x + 16)))))))
8638 // This combine is only applicable to illegal, but splittable, vectors.
8639 // All legal types, and illegal non-vector types, are handled elsewhere.
8640 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8642 if (N0
->getOpcode() != ISD::LOAD
)
8645 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
8647 if (!ISD::isNON_EXTLoad(LN0
) || !ISD::isUNINDEXEDLoad(LN0
) ||
8648 !N0
.hasOneUse() || LN0
->isVolatile() || !DstVT
.isVector() ||
8649 !DstVT
.isPow2VectorType() || !TLI
.isVectorLoadExtDesirable(SDValue(N
, 0)))
8652 SmallVector
<SDNode
*, 4> SetCCs
;
8653 if (!ExtendUsesToFormExtLoad(DstVT
, N
, N0
, N
->getOpcode(), SetCCs
, TLI
))
8656 ISD::LoadExtType ExtType
=
8657 N
->getOpcode() == ISD::SIGN_EXTEND
? ISD::SEXTLOAD
: ISD::ZEXTLOAD
;
8659 // Try to split the vector types to get down to legal types.
8660 EVT SplitSrcVT
= SrcVT
;
8661 EVT SplitDstVT
= DstVT
;
8662 while (!TLI
.isLoadExtLegalOrCustom(ExtType
, SplitDstVT
, SplitSrcVT
) &&
8663 SplitSrcVT
.getVectorNumElements() > 1) {
8664 SplitDstVT
= DAG
.GetSplitDestVTs(SplitDstVT
).first
;
8665 SplitSrcVT
= DAG
.GetSplitDestVTs(SplitSrcVT
).first
;
8668 if (!TLI
.isLoadExtLegalOrCustom(ExtType
, SplitDstVT
, SplitSrcVT
))
8672 const unsigned NumSplits
=
8673 DstVT
.getVectorNumElements() / SplitDstVT
.getVectorNumElements();
8674 const unsigned Stride
= SplitSrcVT
.getStoreSize();
8675 SmallVector
<SDValue
, 4> Loads
;
8676 SmallVector
<SDValue
, 4> Chains
;
8678 SDValue BasePtr
= LN0
->getBasePtr();
8679 for (unsigned Idx
= 0; Idx
< NumSplits
; Idx
++) {
8680 const unsigned Offset
= Idx
* Stride
;
8681 const unsigned Align
= MinAlign(LN0
->getAlignment(), Offset
);
8683 SDValue SplitLoad
= DAG
.getExtLoad(
8684 ExtType
, SDLoc(LN0
), SplitDstVT
, LN0
->getChain(), BasePtr
,
8685 LN0
->getPointerInfo().getWithOffset(Offset
), SplitSrcVT
, Align
,
8686 LN0
->getMemOperand()->getFlags(), LN0
->getAAInfo());
8688 BasePtr
= DAG
.getNode(ISD::ADD
, DL
, BasePtr
.getValueType(), BasePtr
,
8689 DAG
.getConstant(Stride
, DL
, BasePtr
.getValueType()));
8691 Loads
.push_back(SplitLoad
.getValue(0));
8692 Chains
.push_back(SplitLoad
.getValue(1));
8695 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, Chains
);
8696 SDValue NewValue
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, DstVT
, Loads
);
8699 AddToWorklist(NewChain
.getNode());
8701 CombineTo(N
, NewValue
);
8703 // Replace uses of the original load (before extension)
8704 // with a truncate of the concatenated sextloaded vectors.
8706 DAG
.getNode(ISD::TRUNCATE
, SDLoc(N0
), N0
.getValueType(), NewValue
);
8707 ExtendSetCCUses(SetCCs
, N0
, NewValue
, (ISD::NodeType
)N
->getOpcode());
8708 CombineTo(N0
.getNode(), Trunc
, NewChain
);
8709 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
8712 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8713 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8714 SDValue
DAGCombiner::CombineZExtLogicopShiftLoad(SDNode
*N
) {
8715 assert(N
->getOpcode() == ISD::ZERO_EXTEND
);
8716 EVT VT
= N
->getValueType(0);
8717 EVT OrigVT
= N
->getOperand(0).getValueType();
8718 if (TLI
.isZExtFree(OrigVT
, VT
))
8722 SDValue N0
= N
->getOperand(0);
8723 if (!(N0
.getOpcode() == ISD::AND
|| N0
.getOpcode() == ISD::OR
||
8724 N0
.getOpcode() == ISD::XOR
) ||
8725 N0
.getOperand(1).getOpcode() != ISD::Constant
||
8726 (LegalOperations
&& !TLI
.isOperationLegal(N0
.getOpcode(), VT
)))
8730 SDValue N1
= N0
->getOperand(0);
8731 if (!(N1
.getOpcode() == ISD::SHL
|| N1
.getOpcode() == ISD::SRL
) ||
8732 N1
.getOperand(1).getOpcode() != ISD::Constant
||
8733 (LegalOperations
&& !TLI
.isOperationLegal(N1
.getOpcode(), VT
)))
8737 if (!isa
<LoadSDNode
>(N1
.getOperand(0)))
8739 LoadSDNode
*Load
= cast
<LoadSDNode
>(N1
.getOperand(0));
8740 EVT MemVT
= Load
->getMemoryVT();
8741 if (!TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, VT
, MemVT
) ||
8742 Load
->getExtensionType() == ISD::SEXTLOAD
|| Load
->isIndexed())
8746 // If the shift op is SHL, the logic op must be AND, otherwise the result
8748 if (N1
.getOpcode() == ISD::SHL
&& N0
.getOpcode() != ISD::AND
)
8751 if (!N0
.hasOneUse() || !N1
.hasOneUse())
8754 SmallVector
<SDNode
*, 4> SetCCs
;
8755 if (!ExtendUsesToFormExtLoad(VT
, N1
.getNode(), N1
.getOperand(0),
8756 ISD::ZERO_EXTEND
, SetCCs
, TLI
))
8759 // Actually do the transformation.
8760 SDValue ExtLoad
= DAG
.getExtLoad(ISD::ZEXTLOAD
, SDLoc(Load
), VT
,
8761 Load
->getChain(), Load
->getBasePtr(),
8762 Load
->getMemoryVT(), Load
->getMemOperand());
8765 SDValue Shift
= DAG
.getNode(N1
.getOpcode(), DL1
, VT
, ExtLoad
,
8768 APInt Mask
= cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
8769 Mask
= Mask
.zext(VT
.getSizeInBits());
8771 SDValue And
= DAG
.getNode(N0
.getOpcode(), DL0
, VT
, Shift
,
8772 DAG
.getConstant(Mask
, DL0
, VT
));
8774 ExtendSetCCUses(SetCCs
, N1
.getOperand(0), ExtLoad
, ISD::ZERO_EXTEND
);
8776 if (SDValue(Load
, 0).hasOneUse()) {
8777 DAG
.ReplaceAllUsesOfValueWith(SDValue(Load
, 1), ExtLoad
.getValue(1));
8779 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(Load
),
8780 Load
->getValueType(0), ExtLoad
);
8781 CombineTo(Load
, Trunc
, ExtLoad
.getValue(1));
8784 // N0 is dead at this point.
8785 recursivelyDeleteUnusedNodes(N0
.getNode());
8787 return SDValue(N
,0); // Return N so it doesn't get rechecked!
8790 /// If we're narrowing or widening the result of a vector select and the final
8791 /// size is the same size as a setcc (compare) feeding the select, then try to
8792 /// apply the cast operation to the select's operands because matching vector
8793 /// sizes for a select condition and other operands should be more efficient.
8794 SDValue
DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode
*Cast
) {
8795 unsigned CastOpcode
= Cast
->getOpcode();
8796 assert((CastOpcode
== ISD::SIGN_EXTEND
|| CastOpcode
== ISD::ZERO_EXTEND
||
8797 CastOpcode
== ISD::TRUNCATE
|| CastOpcode
== ISD::FP_EXTEND
||
8798 CastOpcode
== ISD::FP_ROUND
) &&
8799 "Unexpected opcode for vector select narrowing/widening");
8801 // We only do this transform before legal ops because the pattern may be
8802 // obfuscated by target-specific operations after legalization. Do not create
8803 // an illegal select op, however, because that may be difficult to lower.
8804 EVT VT
= Cast
->getValueType(0);
8805 if (LegalOperations
|| !TLI
.isOperationLegalOrCustom(ISD::VSELECT
, VT
))
8808 SDValue VSel
= Cast
->getOperand(0);
8809 if (VSel
.getOpcode() != ISD::VSELECT
|| !VSel
.hasOneUse() ||
8810 VSel
.getOperand(0).getOpcode() != ISD::SETCC
)
8813 // Does the setcc have the same vector size as the casted select?
8814 SDValue SetCC
= VSel
.getOperand(0);
8815 EVT SetCCVT
= getSetCCResultType(SetCC
.getOperand(0).getValueType());
8816 if (SetCCVT
.getSizeInBits() != VT
.getSizeInBits())
8819 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8820 SDValue A
= VSel
.getOperand(1);
8821 SDValue B
= VSel
.getOperand(2);
8822 SDValue CastA
, CastB
;
8824 if (CastOpcode
== ISD::FP_ROUND
) {
8825 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8826 CastA
= DAG
.getNode(CastOpcode
, DL
, VT
, A
, Cast
->getOperand(1));
8827 CastB
= DAG
.getNode(CastOpcode
, DL
, VT
, B
, Cast
->getOperand(1));
8829 CastA
= DAG
.getNode(CastOpcode
, DL
, VT
, A
);
8830 CastB
= DAG
.getNode(CastOpcode
, DL
, VT
, B
);
8832 return DAG
.getNode(ISD::VSELECT
, DL
, VT
, SetCC
, CastA
, CastB
);
8835 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8836 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8837 static SDValue
tryToFoldExtOfExtload(SelectionDAG
&DAG
, DAGCombiner
&Combiner
,
8838 const TargetLowering
&TLI
, EVT VT
,
8839 bool LegalOperations
, SDNode
*N
,
8840 SDValue N0
, ISD::LoadExtType ExtLoadType
) {
8841 SDNode
*N0Node
= N0
.getNode();
8842 bool isAExtLoad
= (ExtLoadType
== ISD::SEXTLOAD
) ? ISD::isSEXTLoad(N0Node
)
8843 : ISD::isZEXTLoad(N0Node
);
8844 if ((!isAExtLoad
&& !ISD::isEXTLoad(N0Node
)) ||
8845 !ISD::isUNINDEXEDLoad(N0Node
) || !N0
.hasOneUse())
8848 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
8849 EVT MemVT
= LN0
->getMemoryVT();
8850 if ((LegalOperations
|| LN0
->isVolatile() || VT
.isVector()) &&
8851 !TLI
.isLoadExtLegal(ExtLoadType
, VT
, MemVT
))
8855 DAG
.getExtLoad(ExtLoadType
, SDLoc(LN0
), VT
, LN0
->getChain(),
8856 LN0
->getBasePtr(), MemVT
, LN0
->getMemOperand());
8857 Combiner
.CombineTo(N
, ExtLoad
);
8858 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN0
, 1), ExtLoad
.getValue(1));
8859 if (LN0
->use_empty())
8860 Combiner
.recursivelyDeleteUnusedNodes(LN0
);
8861 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
8864 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8865 // Only generate vector extloads when 1) they're legal, and 2) they are
8866 // deemed desirable by the target.
8867 static SDValue
tryToFoldExtOfLoad(SelectionDAG
&DAG
, DAGCombiner
&Combiner
,
8868 const TargetLowering
&TLI
, EVT VT
,
8869 bool LegalOperations
, SDNode
*N
, SDValue N0
,
8870 ISD::LoadExtType ExtLoadType
,
8871 ISD::NodeType ExtOpc
) {
8872 if (!ISD::isNON_EXTLoad(N0
.getNode()) ||
8873 !ISD::isUNINDEXEDLoad(N0
.getNode()) ||
8874 ((LegalOperations
|| VT
.isVector() ||
8875 cast
<LoadSDNode
>(N0
)->isVolatile()) &&
8876 !TLI
.isLoadExtLegal(ExtLoadType
, VT
, N0
.getValueType())))
8879 bool DoXform
= true;
8880 SmallVector
<SDNode
*, 4> SetCCs
;
8881 if (!N0
.hasOneUse())
8882 DoXform
= ExtendUsesToFormExtLoad(VT
, N
, N0
, ExtOpc
, SetCCs
, TLI
);
8884 DoXform
&= TLI
.isVectorLoadExtDesirable(SDValue(N
, 0));
8888 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
8889 SDValue ExtLoad
= DAG
.getExtLoad(ExtLoadType
, SDLoc(LN0
), VT
, LN0
->getChain(),
8890 LN0
->getBasePtr(), N0
.getValueType(),
8891 LN0
->getMemOperand());
8892 Combiner
.ExtendSetCCUses(SetCCs
, N0
, ExtLoad
, ExtOpc
);
8893 // If the load value is used only by N, replace it via CombineTo N.
8894 bool NoReplaceTrunc
= SDValue(LN0
, 0).hasOneUse();
8895 Combiner
.CombineTo(N
, ExtLoad
);
8896 if (NoReplaceTrunc
) {
8897 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN0
, 1), ExtLoad
.getValue(1));
8898 Combiner
.recursivelyDeleteUnusedNodes(LN0
);
8901 DAG
.getNode(ISD::TRUNCATE
, SDLoc(N0
), N0
.getValueType(), ExtLoad
);
8902 Combiner
.CombineTo(LN0
, Trunc
, ExtLoad
.getValue(1));
8904 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
8907 static SDValue
foldExtendedSignBitTest(SDNode
*N
, SelectionDAG
&DAG
,
8908 bool LegalOperations
) {
8909 assert((N
->getOpcode() == ISD::SIGN_EXTEND
||
8910 N
->getOpcode() == ISD::ZERO_EXTEND
) && "Expected sext or zext");
8912 SDValue SetCC
= N
->getOperand(0);
8913 if (LegalOperations
|| SetCC
.getOpcode() != ISD::SETCC
||
8914 !SetCC
.hasOneUse() || SetCC
.getValueType() != MVT::i1
)
8917 SDValue X
= SetCC
.getOperand(0);
8918 SDValue Ones
= SetCC
.getOperand(1);
8919 ISD::CondCode CC
= cast
<CondCodeSDNode
>(SetCC
.getOperand(2))->get();
8920 EVT VT
= N
->getValueType(0);
8921 EVT XVT
= X
.getValueType();
8922 // setge X, C is canonicalized to setgt, so we do not need to match that
8923 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8924 // not require the 'not' op.
8925 if (CC
== ISD::SETGT
&& isAllOnesConstant(Ones
) && VT
== XVT
) {
8926 // Invert and smear/shift the sign bit:
8927 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8928 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8930 SDValue NotX
= DAG
.getNOT(DL
, X
, VT
);
8931 SDValue ShiftAmount
= DAG
.getConstant(VT
.getSizeInBits() - 1, DL
, VT
);
8932 auto ShiftOpcode
= N
->getOpcode() == ISD::SIGN_EXTEND
? ISD::SRA
: ISD::SRL
;
8933 return DAG
.getNode(ShiftOpcode
, DL
, VT
, NotX
, ShiftAmount
);
8938 SDValue
DAGCombiner::visitSIGN_EXTEND(SDNode
*N
) {
8939 SDValue N0
= N
->getOperand(0);
8940 EVT VT
= N
->getValueType(0);
8943 if (SDValue Res
= tryToFoldExtendOfConstant(N
, TLI
, DAG
, LegalTypes
))
8946 // fold (sext (sext x)) -> (sext x)
8947 // fold (sext (aext x)) -> (sext x)
8948 if (N0
.getOpcode() == ISD::SIGN_EXTEND
|| N0
.getOpcode() == ISD::ANY_EXTEND
)
8949 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VT
, N0
.getOperand(0));
8951 if (N0
.getOpcode() == ISD::TRUNCATE
) {
8952 // fold (sext (truncate (load x))) -> (sext (smaller load x))
8953 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8954 if (SDValue NarrowLoad
= ReduceLoadWidth(N0
.getNode())) {
8955 SDNode
*oye
= N0
.getOperand(0).getNode();
8956 if (NarrowLoad
.getNode() != N0
.getNode()) {
8957 CombineTo(N0
.getNode(), NarrowLoad
);
8958 // CombineTo deleted the truncate, if needed, but not what's under it.
8961 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
8964 // See if the value being truncated is already sign extended. If so, just
8965 // eliminate the trunc/sext pair.
8966 SDValue Op
= N0
.getOperand(0);
8967 unsigned OpBits
= Op
.getScalarValueSizeInBits();
8968 unsigned MidBits
= N0
.getScalarValueSizeInBits();
8969 unsigned DestBits
= VT
.getScalarSizeInBits();
8970 unsigned NumSignBits
= DAG
.ComputeNumSignBits(Op
);
8972 if (OpBits
== DestBits
) {
8973 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
8974 // bits, it is already ready.
8975 if (NumSignBits
> DestBits
-MidBits
)
8977 } else if (OpBits
< DestBits
) {
8978 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
8979 // bits, just sext from i32.
8980 if (NumSignBits
> OpBits
-MidBits
)
8981 return DAG
.getNode(ISD::SIGN_EXTEND
, DL
, VT
, Op
);
8983 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
8984 // bits, just truncate to i32.
8985 if (NumSignBits
> OpBits
-MidBits
)
8986 return DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, Op
);
8989 // fold (sext (truncate x)) -> (sextinreg x).
8990 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::SIGN_EXTEND_INREG
,
8991 N0
.getValueType())) {
8992 if (OpBits
< DestBits
)
8993 Op
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(N0
), VT
, Op
);
8994 else if (OpBits
> DestBits
)
8995 Op
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N0
), VT
, Op
);
8996 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, VT
, Op
,
8997 DAG
.getValueType(N0
.getValueType()));
9001 // Try to simplify (sext (load x)).
9002 if (SDValue foldedExt
=
9003 tryToFoldExtOfLoad(DAG
, *this, TLI
, VT
, LegalOperations
, N
, N0
,
9004 ISD::SEXTLOAD
, ISD::SIGN_EXTEND
))
9007 // fold (sext (load x)) to multiple smaller sextloads.
9008 // Only on illegal but splittable vectors.
9009 if (SDValue ExtLoad
= CombineExtLoad(N
))
9012 // Try to simplify (sext (sextload x)).
9013 if (SDValue foldedExt
= tryToFoldExtOfExtload(
9014 DAG
, *this, TLI
, VT
, LegalOperations
, N
, N0
, ISD::SEXTLOAD
))
9017 // fold (sext (and/or/xor (load x), cst)) ->
9018 // (and/or/xor (sextload x), (sext cst))
9019 if ((N0
.getOpcode() == ISD::AND
|| N0
.getOpcode() == ISD::OR
||
9020 N0
.getOpcode() == ISD::XOR
) &&
9021 isa
<LoadSDNode
>(N0
.getOperand(0)) &&
9022 N0
.getOperand(1).getOpcode() == ISD::Constant
&&
9023 (!LegalOperations
&& TLI
.isOperationLegal(N0
.getOpcode(), VT
))) {
9024 LoadSDNode
*LN00
= cast
<LoadSDNode
>(N0
.getOperand(0));
9025 EVT MemVT
= LN00
->getMemoryVT();
9026 if (TLI
.isLoadExtLegal(ISD::SEXTLOAD
, VT
, MemVT
) &&
9027 LN00
->getExtensionType() != ISD::ZEXTLOAD
&& LN00
->isUnindexed()) {
9028 SmallVector
<SDNode
*, 4> SetCCs
;
9029 bool DoXform
= ExtendUsesToFormExtLoad(VT
, N0
.getNode(), N0
.getOperand(0),
9030 ISD::SIGN_EXTEND
, SetCCs
, TLI
);
9032 SDValue ExtLoad
= DAG
.getExtLoad(ISD::SEXTLOAD
, SDLoc(LN00
), VT
,
9033 LN00
->getChain(), LN00
->getBasePtr(),
9034 LN00
->getMemoryVT(),
9035 LN00
->getMemOperand());
9036 APInt Mask
= cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
9037 Mask
= Mask
.sext(VT
.getSizeInBits());
9038 SDValue And
= DAG
.getNode(N0
.getOpcode(), DL
, VT
,
9039 ExtLoad
, DAG
.getConstant(Mask
, DL
, VT
));
9040 ExtendSetCCUses(SetCCs
, N0
.getOperand(0), ExtLoad
, ISD::SIGN_EXTEND
);
9041 bool NoReplaceTruncAnd
= !N0
.hasOneUse();
9042 bool NoReplaceTrunc
= SDValue(LN00
, 0).hasOneUse();
9044 // If N0 has multiple uses, change other uses as well.
9045 if (NoReplaceTruncAnd
) {
9047 DAG
.getNode(ISD::TRUNCATE
, DL
, N0
.getValueType(), And
);
9048 CombineTo(N0
.getNode(), TruncAnd
);
9050 if (NoReplaceTrunc
) {
9051 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN00
, 1), ExtLoad
.getValue(1));
9053 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(LN00
),
9054 LN00
->getValueType(0), ExtLoad
);
9055 CombineTo(LN00
, Trunc
, ExtLoad
.getValue(1));
9057 return SDValue(N
,0); // Return N so it doesn't get rechecked!
9062 if (SDValue V
= foldExtendedSignBitTest(N
, DAG
, LegalOperations
))
9065 if (N0
.getOpcode() == ISD::SETCC
) {
9066 SDValue N00
= N0
.getOperand(0);
9067 SDValue N01
= N0
.getOperand(1);
9068 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
9069 EVT N00VT
= N0
.getOperand(0).getValueType();
9071 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9072 // Only do this before legalize for now.
9073 if (VT
.isVector() && !LegalOperations
&&
9074 TLI
.getBooleanContents(N00VT
) ==
9075 TargetLowering::ZeroOrNegativeOneBooleanContent
) {
9076 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9077 // of the same size as the compared operands. Only optimize sext(setcc())
9078 // if this is the case.
9079 EVT SVT
= getSetCCResultType(N00VT
);
9081 // If we already have the desired type, don't change it.
9082 if (SVT
!= N0
.getValueType()) {
9083 // We know that the # elements of the results is the same as the
9084 // # elements of the compare (and the # elements of the compare result
9085 // for that matter). Check to see that they are the same size. If so,
9086 // we know that the element size of the sext'd result matches the
9087 // element size of the compare operands.
9088 if (VT
.getSizeInBits() == SVT
.getSizeInBits())
9089 return DAG
.getSetCC(DL
, VT
, N00
, N01
, CC
);
9091 // If the desired elements are smaller or larger than the source
9092 // elements, we can use a matching integer vector type and then
9093 // truncate/sign extend.
9094 EVT MatchingVecType
= N00VT
.changeVectorElementTypeToInteger();
9095 if (SVT
== MatchingVecType
) {
9096 SDValue VsetCC
= DAG
.getSetCC(DL
, MatchingVecType
, N00
, N01
, CC
);
9097 return DAG
.getSExtOrTrunc(VsetCC
, DL
, VT
);
9102 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9103 // Here, T can be 1 or -1, depending on the type of the setcc and
9104 // getBooleanContents().
9105 unsigned SetCCWidth
= N0
.getScalarValueSizeInBits();
9107 // To determine the "true" side of the select, we need to know the high bit
9108 // of the value returned by the setcc if it evaluates to true.
9109 // If the type of the setcc is i1, then the true case of the select is just
9110 // sext(i1 1), that is, -1.
9111 // If the type of the setcc is larger (say, i8) then the value of the high
9112 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9113 // of the appropriate width.
9114 SDValue ExtTrueVal
= (SetCCWidth
== 1)
9115 ? DAG
.getAllOnesConstant(DL
, VT
)
9116 : DAG
.getBoolConstant(true, DL
, VT
, N00VT
);
9117 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
9119 SimplifySelectCC(DL
, N00
, N01
, ExtTrueVal
, Zero
, CC
, true))
9122 if (!VT
.isVector() && !TLI
.convertSelectOfConstantsToMath(VT
)) {
9123 EVT SetCCVT
= getSetCCResultType(N00VT
);
9124 // Don't do this transform for i1 because there's a select transform
9125 // that would reverse it.
9126 // TODO: We should not do this transform at all without a target hook
9127 // because a sext is likely cheaper than a select?
9128 if (SetCCVT
.getScalarSizeInBits() != 1 &&
9129 (!LegalOperations
|| TLI
.isOperationLegal(ISD::SETCC
, N00VT
))) {
9130 SDValue SetCC
= DAG
.getSetCC(DL
, SetCCVT
, N00
, N01
, CC
);
9131 return DAG
.getSelect(DL
, VT
, SetCC
, ExtTrueVal
, Zero
);
9136 // fold (sext x) -> (zext x) if the sign bit is known zero.
9137 if ((!LegalOperations
|| TLI
.isOperationLegal(ISD::ZERO_EXTEND
, VT
)) &&
9138 DAG
.SignBitIsZero(N0
))
9139 return DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, N0
);
9141 if (SDValue NewVSel
= matchVSelectOpSizesWithSetCC(N
))
9144 // Eliminate this sign extend by doing a negation in the destination type:
9145 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9146 if (N0
.getOpcode() == ISD::SUB
&& N0
.hasOneUse() &&
9147 isNullOrNullSplat(N0
.getOperand(0)) &&
9148 N0
.getOperand(1).getOpcode() == ISD::ZERO_EXTEND
&&
9149 TLI
.isOperationLegalOrCustom(ISD::SUB
, VT
)) {
9150 SDValue Zext
= DAG
.getZExtOrTrunc(N0
.getOperand(1).getOperand(0), DL
, VT
);
9151 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Zext
);
9153 // Eliminate this sign extend by doing a decrement in the destination type:
9154 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9155 if (N0
.getOpcode() == ISD::ADD
&& N0
.hasOneUse() &&
9156 isAllOnesOrAllOnesSplat(N0
.getOperand(1)) &&
9157 N0
.getOperand(0).getOpcode() == ISD::ZERO_EXTEND
&&
9158 TLI
.isOperationLegalOrCustom(ISD::ADD
, VT
)) {
9159 SDValue Zext
= DAG
.getZExtOrTrunc(N0
.getOperand(0).getOperand(0), DL
, VT
);
9160 return DAG
.getNode(ISD::ADD
, DL
, VT
, Zext
, DAG
.getAllOnesConstant(DL
, VT
));
9166 // isTruncateOf - If N is a truncate of some other value, return true, record
9167 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9168 // This function computes KnownBits to avoid a duplicated call to
9169 // computeKnownBits in the caller.
9170 static bool isTruncateOf(SelectionDAG
&DAG
, SDValue N
, SDValue
&Op
,
9172 if (N
->getOpcode() == ISD::TRUNCATE
) {
9173 Op
= N
->getOperand(0);
9174 Known
= DAG
.computeKnownBits(Op
);
9178 if (N
.getOpcode() != ISD::SETCC
||
9179 N
.getValueType().getScalarType() != MVT::i1
||
9180 cast
<CondCodeSDNode
>(N
.getOperand(2))->get() != ISD::SETNE
)
9183 SDValue Op0
= N
->getOperand(0);
9184 SDValue Op1
= N
->getOperand(1);
9185 assert(Op0
.getValueType() == Op1
.getValueType());
9187 if (isNullOrNullSplat(Op0
))
9189 else if (isNullOrNullSplat(Op1
))
9194 Known
= DAG
.computeKnownBits(Op
);
9196 return (Known
.Zero
| 1).isAllOnesValue();
9199 SDValue
DAGCombiner::visitZERO_EXTEND(SDNode
*N
) {
9200 SDValue N0
= N
->getOperand(0);
9201 EVT VT
= N
->getValueType(0);
9203 if (SDValue Res
= tryToFoldExtendOfConstant(N
, TLI
, DAG
, LegalTypes
))
9206 // fold (zext (zext x)) -> (zext x)
9207 // fold (zext (aext x)) -> (zext x)
9208 if (N0
.getOpcode() == ISD::ZERO_EXTEND
|| N0
.getOpcode() == ISD::ANY_EXTEND
)
9209 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(N
), VT
,
9212 // fold (zext (truncate x)) -> (zext x) or
9213 // (zext (truncate x)) -> (truncate x)
9214 // This is valid when the truncated bits of x are already zero.
9217 if (isTruncateOf(DAG
, N0
, Op
, Known
)) {
9218 APInt TruncatedBits
=
9219 (Op
.getScalarValueSizeInBits() == N0
.getScalarValueSizeInBits()) ?
9220 APInt(Op
.getScalarValueSizeInBits(), 0) :
9221 APInt::getBitsSet(Op
.getScalarValueSizeInBits(),
9222 N0
.getScalarValueSizeInBits(),
9223 std::min(Op
.getScalarValueSizeInBits(),
9224 VT
.getScalarSizeInBits()));
9225 if (TruncatedBits
.isSubsetOf(Known
.Zero
))
9226 return DAG
.getZExtOrTrunc(Op
, SDLoc(N
), VT
);
9229 // fold (zext (truncate x)) -> (and x, mask)
9230 if (N0
.getOpcode() == ISD::TRUNCATE
) {
9231 // fold (zext (truncate (load x))) -> (zext (smaller load x))
9232 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9233 if (SDValue NarrowLoad
= ReduceLoadWidth(N0
.getNode())) {
9234 SDNode
*oye
= N0
.getOperand(0).getNode();
9235 if (NarrowLoad
.getNode() != N0
.getNode()) {
9236 CombineTo(N0
.getNode(), NarrowLoad
);
9237 // CombineTo deleted the truncate, if needed, but not what's under it.
9240 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
9243 EVT SrcVT
= N0
.getOperand(0).getValueType();
9244 EVT MinVT
= N0
.getValueType();
9246 // Try to mask before the extension to avoid having to generate a larger mask,
9247 // possibly over several sub-vectors.
9248 if (SrcVT
.bitsLT(VT
) && VT
.isVector()) {
9249 if (!LegalOperations
|| (TLI
.isOperationLegal(ISD::AND
, SrcVT
) &&
9250 TLI
.isOperationLegal(ISD::ZERO_EXTEND
, VT
))) {
9251 SDValue Op
= N0
.getOperand(0);
9252 Op
= DAG
.getZeroExtendInReg(Op
, SDLoc(N
), MinVT
.getScalarType());
9253 AddToWorklist(Op
.getNode());
9254 SDValue ZExtOrTrunc
= DAG
.getZExtOrTrunc(Op
, SDLoc(N
), VT
);
9255 // Transfer the debug info; the new node is equivalent to N0.
9256 DAG
.transferDbgValues(N0
, ZExtOrTrunc
);
9261 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::AND
, VT
)) {
9262 SDValue Op
= DAG
.getAnyExtOrTrunc(N0
.getOperand(0), SDLoc(N
), VT
);
9263 AddToWorklist(Op
.getNode());
9264 SDValue And
= DAG
.getZeroExtendInReg(Op
, SDLoc(N
), MinVT
.getScalarType());
9265 // We may safely transfer the debug info describing the truncate node over
9266 // to the equivalent and operation.
9267 DAG
.transferDbgValues(N0
, And
);
9272 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9273 // if either of the casts is not free.
9274 if (N0
.getOpcode() == ISD::AND
&&
9275 N0
.getOperand(0).getOpcode() == ISD::TRUNCATE
&&
9276 N0
.getOperand(1).getOpcode() == ISD::Constant
&&
9277 (!TLI
.isTruncateFree(N0
.getOperand(0).getOperand(0).getValueType(),
9278 N0
.getValueType()) ||
9279 !TLI
.isZExtFree(N0
.getValueType(), VT
))) {
9280 SDValue X
= N0
.getOperand(0).getOperand(0);
9281 X
= DAG
.getAnyExtOrTrunc(X
, SDLoc(X
), VT
);
9282 APInt Mask
= cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
9283 Mask
= Mask
.zext(VT
.getSizeInBits());
9285 return DAG
.getNode(ISD::AND
, DL
, VT
,
9286 X
, DAG
.getConstant(Mask
, DL
, VT
));
9289 // Try to simplify (zext (load x)).
9290 if (SDValue foldedExt
=
9291 tryToFoldExtOfLoad(DAG
, *this, TLI
, VT
, LegalOperations
, N
, N0
,
9292 ISD::ZEXTLOAD
, ISD::ZERO_EXTEND
))
9295 // fold (zext (load x)) to multiple smaller zextloads.
9296 // Only on illegal but splittable vectors.
9297 if (SDValue ExtLoad
= CombineExtLoad(N
))
9300 // fold (zext (and/or/xor (load x), cst)) ->
9301 // (and/or/xor (zextload x), (zext cst))
9302 // Unless (and (load x) cst) will match as a zextload already and has
9303 // additional users.
9304 if ((N0
.getOpcode() == ISD::AND
|| N0
.getOpcode() == ISD::OR
||
9305 N0
.getOpcode() == ISD::XOR
) &&
9306 isa
<LoadSDNode
>(N0
.getOperand(0)) &&
9307 N0
.getOperand(1).getOpcode() == ISD::Constant
&&
9308 (!LegalOperations
&& TLI
.isOperationLegal(N0
.getOpcode(), VT
))) {
9309 LoadSDNode
*LN00
= cast
<LoadSDNode
>(N0
.getOperand(0));
9310 EVT MemVT
= LN00
->getMemoryVT();
9311 if (TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, VT
, MemVT
) &&
9312 LN00
->getExtensionType() != ISD::SEXTLOAD
&& LN00
->isUnindexed()) {
9313 bool DoXform
= true;
9314 SmallVector
<SDNode
*, 4> SetCCs
;
9315 if (!N0
.hasOneUse()) {
9316 if (N0
.getOpcode() == ISD::AND
) {
9317 auto *AndC
= cast
<ConstantSDNode
>(N0
.getOperand(1));
9318 EVT LoadResultTy
= AndC
->getValueType(0);
9320 if (isAndLoadExtLoad(AndC
, LN00
, LoadResultTy
, ExtVT
))
9325 DoXform
= ExtendUsesToFormExtLoad(VT
, N0
.getNode(), N0
.getOperand(0),
9326 ISD::ZERO_EXTEND
, SetCCs
, TLI
);
9328 SDValue ExtLoad
= DAG
.getExtLoad(ISD::ZEXTLOAD
, SDLoc(LN00
), VT
,
9329 LN00
->getChain(), LN00
->getBasePtr(),
9330 LN00
->getMemoryVT(),
9331 LN00
->getMemOperand());
9332 APInt Mask
= cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
9333 Mask
= Mask
.zext(VT
.getSizeInBits());
9335 SDValue And
= DAG
.getNode(N0
.getOpcode(), DL
, VT
,
9336 ExtLoad
, DAG
.getConstant(Mask
, DL
, VT
));
9337 ExtendSetCCUses(SetCCs
, N0
.getOperand(0), ExtLoad
, ISD::ZERO_EXTEND
);
9338 bool NoReplaceTruncAnd
= !N0
.hasOneUse();
9339 bool NoReplaceTrunc
= SDValue(LN00
, 0).hasOneUse();
9341 // If N0 has multiple uses, change other uses as well.
9342 if (NoReplaceTruncAnd
) {
9344 DAG
.getNode(ISD::TRUNCATE
, DL
, N0
.getValueType(), And
);
9345 CombineTo(N0
.getNode(), TruncAnd
);
9347 if (NoReplaceTrunc
) {
9348 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN00
, 1), ExtLoad
.getValue(1));
9350 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(LN00
),
9351 LN00
->getValueType(0), ExtLoad
);
9352 CombineTo(LN00
, Trunc
, ExtLoad
.getValue(1));
9354 return SDValue(N
,0); // Return N so it doesn't get rechecked!
9359 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9360 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9361 if (SDValue ZExtLoad
= CombineZExtLogicopShiftLoad(N
))
9364 // Try to simplify (zext (zextload x)).
9365 if (SDValue foldedExt
= tryToFoldExtOfExtload(
9366 DAG
, *this, TLI
, VT
, LegalOperations
, N
, N0
, ISD::ZEXTLOAD
))
9369 if (SDValue V
= foldExtendedSignBitTest(N
, DAG
, LegalOperations
))
9372 if (N0
.getOpcode() == ISD::SETCC
) {
9373 // Only do this before legalize for now.
9374 if (!LegalOperations
&& VT
.isVector() &&
9375 N0
.getValueType().getVectorElementType() == MVT::i1
) {
9376 EVT N00VT
= N0
.getOperand(0).getValueType();
9377 if (getSetCCResultType(N00VT
) == N0
.getValueType())
9380 // We know that the # elements of the results is the same as the #
9381 // elements of the compare (and the # elements of the compare result for
9382 // that matter). Check to see that they are the same size. If so, we know
9383 // that the element size of the sext'd result matches the element size of
9384 // the compare operands.
9386 SDValue VecOnes
= DAG
.getConstant(1, DL
, VT
);
9387 if (VT
.getSizeInBits() == N00VT
.getSizeInBits()) {
9388 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9389 SDValue VSetCC
= DAG
.getNode(ISD::SETCC
, DL
, VT
, N0
.getOperand(0),
9390 N0
.getOperand(1), N0
.getOperand(2));
9391 return DAG
.getNode(ISD::AND
, DL
, VT
, VSetCC
, VecOnes
);
9394 // If the desired elements are smaller or larger than the source
9395 // elements we can use a matching integer vector type and then
9396 // truncate/sign extend.
9397 EVT MatchingVectorType
= N00VT
.changeVectorElementTypeToInteger();
9399 DAG
.getNode(ISD::SETCC
, DL
, MatchingVectorType
, N0
.getOperand(0),
9400 N0
.getOperand(1), N0
.getOperand(2));
9401 return DAG
.getNode(ISD::AND
, DL
, VT
, DAG
.getSExtOrTrunc(VsetCC
, DL
, VT
),
9405 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9407 if (SDValue SCC
= SimplifySelectCC(
9408 DL
, N0
.getOperand(0), N0
.getOperand(1), DAG
.getConstant(1, DL
, VT
),
9409 DAG
.getConstant(0, DL
, VT
),
9410 cast
<CondCodeSDNode
>(N0
.getOperand(2))->get(), true))
9414 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9415 if ((N0
.getOpcode() == ISD::SHL
|| N0
.getOpcode() == ISD::SRL
) &&
9416 isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
9417 N0
.getOperand(0).getOpcode() == ISD::ZERO_EXTEND
&&
9419 SDValue ShAmt
= N0
.getOperand(1);
9420 unsigned ShAmtVal
= cast
<ConstantSDNode
>(ShAmt
)->getZExtValue();
9421 if (N0
.getOpcode() == ISD::SHL
) {
9422 SDValue InnerZExt
= N0
.getOperand(0);
9423 // If the original shl may be shifting out bits, do not perform this
9425 unsigned KnownZeroBits
= InnerZExt
.getValueSizeInBits() -
9426 InnerZExt
.getOperand(0).getValueSizeInBits();
9427 if (ShAmtVal
> KnownZeroBits
)
9433 // Ensure that the shift amount is wide enough for the shifted value.
9434 if (VT
.getSizeInBits() >= 256)
9435 ShAmt
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i32
, ShAmt
);
9437 return DAG
.getNode(N0
.getOpcode(), DL
, VT
,
9438 DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, N0
.getOperand(0)),
9442 if (SDValue NewVSel
= matchVSelectOpSizesWithSetCC(N
))
9448 SDValue
DAGCombiner::visitANY_EXTEND(SDNode
*N
) {
9449 SDValue N0
= N
->getOperand(0);
9450 EVT VT
= N
->getValueType(0);
9452 if (SDValue Res
= tryToFoldExtendOfConstant(N
, TLI
, DAG
, LegalTypes
))
9455 // fold (aext (aext x)) -> (aext x)
9456 // fold (aext (zext x)) -> (zext x)
9457 // fold (aext (sext x)) -> (sext x)
9458 if (N0
.getOpcode() == ISD::ANY_EXTEND
||
9459 N0
.getOpcode() == ISD::ZERO_EXTEND
||
9460 N0
.getOpcode() == ISD::SIGN_EXTEND
)
9461 return DAG
.getNode(N0
.getOpcode(), SDLoc(N
), VT
, N0
.getOperand(0));
9463 // fold (aext (truncate (load x))) -> (aext (smaller load x))
9464 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9465 if (N0
.getOpcode() == ISD::TRUNCATE
) {
9466 if (SDValue NarrowLoad
= ReduceLoadWidth(N0
.getNode())) {
9467 SDNode
*oye
= N0
.getOperand(0).getNode();
9468 if (NarrowLoad
.getNode() != N0
.getNode()) {
9469 CombineTo(N0
.getNode(), NarrowLoad
);
9470 // CombineTo deleted the truncate, if needed, but not what's under it.
9473 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
9477 // fold (aext (truncate x))
9478 if (N0
.getOpcode() == ISD::TRUNCATE
)
9479 return DAG
.getAnyExtOrTrunc(N0
.getOperand(0), SDLoc(N
), VT
);
9481 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9482 // if the trunc is not free.
9483 if (N0
.getOpcode() == ISD::AND
&&
9484 N0
.getOperand(0).getOpcode() == ISD::TRUNCATE
&&
9485 N0
.getOperand(1).getOpcode() == ISD::Constant
&&
9486 !TLI
.isTruncateFree(N0
.getOperand(0).getOperand(0).getValueType(),
9487 N0
.getValueType())) {
9489 SDValue X
= N0
.getOperand(0).getOperand(0);
9490 X
= DAG
.getAnyExtOrTrunc(X
, DL
, VT
);
9491 APInt Mask
= cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
9492 Mask
= Mask
.zext(VT
.getSizeInBits());
9493 return DAG
.getNode(ISD::AND
, DL
, VT
,
9494 X
, DAG
.getConstant(Mask
, DL
, VT
));
9497 // fold (aext (load x)) -> (aext (truncate (extload x)))
9498 // None of the supported targets knows how to perform load and any_ext
9499 // on vectors in one instruction. We only perform this transformation on
9501 if (ISD::isNON_EXTLoad(N0
.getNode()) && !VT
.isVector() &&
9502 ISD::isUNINDEXEDLoad(N0
.getNode()) &&
9503 TLI
.isLoadExtLegal(ISD::EXTLOAD
, VT
, N0
.getValueType())) {
9504 bool DoXform
= true;
9505 SmallVector
<SDNode
*, 4> SetCCs
;
9506 if (!N0
.hasOneUse())
9507 DoXform
= ExtendUsesToFormExtLoad(VT
, N
, N0
, ISD::ANY_EXTEND
, SetCCs
,
9510 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
9511 SDValue ExtLoad
= DAG
.getExtLoad(ISD::EXTLOAD
, SDLoc(N
), VT
,
9513 LN0
->getBasePtr(), N0
.getValueType(),
9514 LN0
->getMemOperand());
9515 ExtendSetCCUses(SetCCs
, N0
, ExtLoad
, ISD::ANY_EXTEND
);
9516 // If the load value is used only by N, replace it via CombineTo N.
9517 bool NoReplaceTrunc
= N0
.hasOneUse();
9518 CombineTo(N
, ExtLoad
);
9519 if (NoReplaceTrunc
) {
9520 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN0
, 1), ExtLoad
.getValue(1));
9521 recursivelyDeleteUnusedNodes(LN0
);
9523 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N0
),
9524 N0
.getValueType(), ExtLoad
);
9525 CombineTo(LN0
, Trunc
, ExtLoad
.getValue(1));
9527 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
9531 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9532 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9533 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
9534 if (N0
.getOpcode() == ISD::LOAD
&& !ISD::isNON_EXTLoad(N0
.getNode()) &&
9535 ISD::isUNINDEXEDLoad(N0
.getNode()) && N0
.hasOneUse()) {
9536 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
9537 ISD::LoadExtType ExtType
= LN0
->getExtensionType();
9538 EVT MemVT
= LN0
->getMemoryVT();
9539 if (!LegalOperations
|| TLI
.isLoadExtLegal(ExtType
, VT
, MemVT
)) {
9540 SDValue ExtLoad
= DAG
.getExtLoad(ExtType
, SDLoc(N
),
9541 VT
, LN0
->getChain(), LN0
->getBasePtr(),
9542 MemVT
, LN0
->getMemOperand());
9543 CombineTo(N
, ExtLoad
);
9544 DAG
.ReplaceAllUsesOfValueWith(SDValue(LN0
, 1), ExtLoad
.getValue(1));
9545 recursivelyDeleteUnusedNodes(LN0
);
9546 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
9550 if (N0
.getOpcode() == ISD::SETCC
) {
9552 // aext(setcc) -> vsetcc
9553 // aext(setcc) -> truncate(vsetcc)
9554 // aext(setcc) -> aext(vsetcc)
9555 // Only do this before legalize for now.
9556 if (VT
.isVector() && !LegalOperations
) {
9557 EVT N00VT
= N0
.getOperand(0).getValueType();
9558 if (getSetCCResultType(N00VT
) == N0
.getValueType())
9561 // We know that the # elements of the results is the same as the
9562 // # elements of the compare (and the # elements of the compare result
9563 // for that matter). Check to see that they are the same size. If so,
9564 // we know that the element size of the sext'd result matches the
9565 // element size of the compare operands.
9566 if (VT
.getSizeInBits() == N00VT
.getSizeInBits())
9567 return DAG
.getSetCC(SDLoc(N
), VT
, N0
.getOperand(0),
9569 cast
<CondCodeSDNode
>(N0
.getOperand(2))->get());
9571 // If the desired elements are smaller or larger than the source
9572 // elements we can use a matching integer vector type and then
9573 // truncate/any extend
9574 EVT MatchingVectorType
= N00VT
.changeVectorElementTypeToInteger();
9576 DAG
.getSetCC(SDLoc(N
), MatchingVectorType
, N0
.getOperand(0),
9578 cast
<CondCodeSDNode
>(N0
.getOperand(2))->get());
9579 return DAG
.getAnyExtOrTrunc(VsetCC
, SDLoc(N
), VT
);
9582 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9584 if (SDValue SCC
= SimplifySelectCC(
9585 DL
, N0
.getOperand(0), N0
.getOperand(1), DAG
.getConstant(1, DL
, VT
),
9586 DAG
.getConstant(0, DL
, VT
),
9587 cast
<CondCodeSDNode
>(N0
.getOperand(2))->get(), true))
9594 SDValue
DAGCombiner::visitAssertExt(SDNode
*N
) {
9595 unsigned Opcode
= N
->getOpcode();
9596 SDValue N0
= N
->getOperand(0);
9597 SDValue N1
= N
->getOperand(1);
9598 EVT AssertVT
= cast
<VTSDNode
>(N1
)->getVT();
9600 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9601 if (N0
.getOpcode() == Opcode
&&
9602 AssertVT
== cast
<VTSDNode
>(N0
.getOperand(1))->getVT())
9605 if (N0
.getOpcode() == ISD::TRUNCATE
&& N0
.hasOneUse() &&
9606 N0
.getOperand(0).getOpcode() == Opcode
) {
9607 // We have an assert, truncate, assert sandwich. Make one stronger assert
9608 // by asserting on the smallest asserted type to the larger source type.
9609 // This eliminates the later assert:
9610 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9611 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9612 SDValue BigA
= N0
.getOperand(0);
9613 EVT BigA_AssertVT
= cast
<VTSDNode
>(BigA
.getOperand(1))->getVT();
9614 assert(BigA_AssertVT
.bitsLE(N0
.getValueType()) &&
9615 "Asserting zero/sign-extended bits to a type larger than the "
9616 "truncated destination does not provide information");
9619 EVT MinAssertVT
= AssertVT
.bitsLT(BigA_AssertVT
) ? AssertVT
: BigA_AssertVT
;
9620 SDValue MinAssertVTVal
= DAG
.getValueType(MinAssertVT
);
9621 SDValue NewAssert
= DAG
.getNode(Opcode
, DL
, BigA
.getValueType(),
9622 BigA
.getOperand(0), MinAssertVTVal
);
9623 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), NewAssert
);
9626 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9627 // than X. Just move the AssertZext in front of the truncate and drop the
9629 if (N0
.getOpcode() == ISD::TRUNCATE
&& N0
.hasOneUse() &&
9630 N0
.getOperand(0).getOpcode() == ISD::AssertSext
&&
9631 Opcode
== ISD::AssertZext
) {
9632 SDValue BigA
= N0
.getOperand(0);
9633 EVT BigA_AssertVT
= cast
<VTSDNode
>(BigA
.getOperand(1))->getVT();
9634 assert(BigA_AssertVT
.bitsLE(N0
.getValueType()) &&
9635 "Asserting zero/sign-extended bits to a type larger than the "
9636 "truncated destination does not provide information");
9638 if (AssertVT
.bitsLT(BigA_AssertVT
)) {
9640 SDValue NewAssert
= DAG
.getNode(Opcode
, DL
, BigA
.getValueType(),
9641 BigA
.getOperand(0), N1
);
9642 return DAG
.getNode(ISD::TRUNCATE
, DL
, N
->getValueType(0), NewAssert
);
9649 /// If the result of a wider load is shifted to right of N bits and then
9650 /// truncated to a narrower type and where N is a multiple of number of bits of
9651 /// the narrower type, transform it to a narrower load from address + N / num of
9652 /// bits of new type. Also narrow the load if the result is masked with an AND
9653 /// to effectively produce a smaller type. If the result is to be extended, also
9654 /// fold the extension to form a extending load.
9655 SDValue
DAGCombiner::ReduceLoadWidth(SDNode
*N
) {
9656 unsigned Opc
= N
->getOpcode();
9658 ISD::LoadExtType ExtType
= ISD::NON_EXTLOAD
;
9659 SDValue N0
= N
->getOperand(0);
9660 EVT VT
= N
->getValueType(0);
9663 // This transformation isn't valid for vector loads.
9668 bool HasShiftedOffset
= false;
9669 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9671 if (Opc
== ISD::SIGN_EXTEND_INREG
) {
9672 ExtType
= ISD::SEXTLOAD
;
9673 ExtVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
9674 } else if (Opc
== ISD::SRL
) {
9675 // Another special-case: SRL is basically zero-extending a narrower value,
9676 // or it maybe shifting a higher subword, half or byte into the lowest
9678 ExtType
= ISD::ZEXTLOAD
;
9681 auto *LN0
= dyn_cast
<LoadSDNode
>(N0
.getOperand(0));
9682 auto *N01
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1));
9686 uint64_t ShiftAmt
= N01
->getZExtValue();
9687 uint64_t MemoryWidth
= LN0
->getMemoryVT().getSizeInBits();
9688 if (LN0
->getExtensionType() != ISD::SEXTLOAD
&& MemoryWidth
> ShiftAmt
)
9689 ExtVT
= EVT::getIntegerVT(*DAG
.getContext(), MemoryWidth
- ShiftAmt
);
9691 ExtVT
= EVT::getIntegerVT(*DAG
.getContext(),
9692 VT
.getSizeInBits() - ShiftAmt
);
9693 } else if (Opc
== ISD::AND
) {
9694 // An AND with a constant mask is the same as a truncate + zero-extend.
9695 auto AndC
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
9699 const APInt
&Mask
= AndC
->getAPIntValue();
9700 unsigned ActiveBits
= 0;
9701 if (Mask
.isMask()) {
9702 ActiveBits
= Mask
.countTrailingOnes();
9703 } else if (Mask
.isShiftedMask()) {
9704 ShAmt
= Mask
.countTrailingZeros();
9705 APInt ShiftedMask
= Mask
.lshr(ShAmt
);
9706 ActiveBits
= ShiftedMask
.countTrailingOnes();
9707 HasShiftedOffset
= true;
9711 ExtType
= ISD::ZEXTLOAD
;
9712 ExtVT
= EVT::getIntegerVT(*DAG
.getContext(), ActiveBits
);
9715 if (N0
.getOpcode() == ISD::SRL
&& N0
.hasOneUse()) {
9717 if (auto *ConstShift
= dyn_cast
<ConstantSDNode
>(SRL
.getOperand(1))) {
9718 ShAmt
= ConstShift
->getZExtValue();
9719 unsigned EVTBits
= ExtVT
.getSizeInBits();
9720 // Is the shift amount a multiple of size of VT?
9721 if ((ShAmt
& (EVTBits
-1)) == 0) {
9722 N0
= N0
.getOperand(0);
9723 // Is the load width a multiple of size of VT?
9724 if ((N0
.getValueSizeInBits() & (EVTBits
-1)) != 0)
9728 // At this point, we must have a load or else we can't do the transform.
9729 if (!isa
<LoadSDNode
>(N0
)) return SDValue();
9731 auto *LN0
= cast
<LoadSDNode
>(N0
);
9733 // Because a SRL must be assumed to *need* to zero-extend the high bits
9734 // (as opposed to anyext the high bits), we can't combine the zextload
9735 // lowering of SRL and an sextload.
9736 if (LN0
->getExtensionType() == ISD::SEXTLOAD
)
9739 // If the shift amount is larger than the input type then we're not
9740 // accessing any of the loaded bytes. If the load was a zextload/extload
9741 // then the result of the shift+trunc is zero/undef (handled elsewhere).
9742 if (ShAmt
>= LN0
->getMemoryVT().getSizeInBits())
9745 // If the SRL is only used by a masking AND, we may be able to adjust
9746 // the ExtVT to make the AND redundant.
9747 SDNode
*Mask
= *(SRL
->use_begin());
9748 if (Mask
->getOpcode() == ISD::AND
&&
9749 isa
<ConstantSDNode
>(Mask
->getOperand(1))) {
9750 const APInt
&ShiftMask
=
9751 cast
<ConstantSDNode
>(Mask
->getOperand(1))->getAPIntValue();
9752 if (ShiftMask
.isMask()) {
9753 EVT MaskedVT
= EVT::getIntegerVT(*DAG
.getContext(),
9754 ShiftMask
.countTrailingOnes());
9755 // If the mask is smaller, recompute the type.
9756 if ((ExtVT
.getSizeInBits() > MaskedVT
.getSizeInBits()) &&
9757 TLI
.isLoadExtLegal(ExtType
, N0
.getValueType(), MaskedVT
))
9764 // If the load is shifted left (and the result isn't shifted back right),
9765 // we can fold the truncate through the shift.
9766 unsigned ShLeftAmt
= 0;
9767 if (ShAmt
== 0 && N0
.getOpcode() == ISD::SHL
&& N0
.hasOneUse() &&
9768 ExtVT
== VT
&& TLI
.isNarrowingProfitable(N0
.getValueType(), VT
)) {
9769 if (ConstantSDNode
*N01
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
9770 ShLeftAmt
= N01
->getZExtValue();
9771 N0
= N0
.getOperand(0);
9775 // If we haven't found a load, we can't narrow it.
9776 if (!isa
<LoadSDNode
>(N0
))
9779 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
9780 if (!isLegalNarrowLdSt(LN0
, ExtType
, ExtVT
, ShAmt
))
9783 auto AdjustBigEndianShift
= [&](unsigned ShAmt
) {
9784 unsigned LVTStoreBits
= LN0
->getMemoryVT().getStoreSizeInBits();
9785 unsigned EVTStoreBits
= ExtVT
.getStoreSizeInBits();
9786 return LVTStoreBits
- EVTStoreBits
- ShAmt
;
9789 // For big endian targets, we need to adjust the offset to the pointer to
9790 // load the correct bytes.
9791 if (DAG
.getDataLayout().isBigEndian())
9792 ShAmt
= AdjustBigEndianShift(ShAmt
);
9794 EVT PtrType
= N0
.getOperand(1).getValueType();
9795 uint64_t PtrOff
= ShAmt
/ 8;
9796 unsigned NewAlign
= MinAlign(LN0
->getAlignment(), PtrOff
);
9798 // The original load itself didn't wrap, so an offset within it doesn't.
9800 Flags
.setNoUnsignedWrap(true);
9801 SDValue NewPtr
= DAG
.getNode(ISD::ADD
, DL
,
9802 PtrType
, LN0
->getBasePtr(),
9803 DAG
.getConstant(PtrOff
, DL
, PtrType
),
9805 AddToWorklist(NewPtr
.getNode());
9808 if (ExtType
== ISD::NON_EXTLOAD
)
9809 Load
= DAG
.getLoad(VT
, SDLoc(N0
), LN0
->getChain(), NewPtr
,
9810 LN0
->getPointerInfo().getWithOffset(PtrOff
), NewAlign
,
9811 LN0
->getMemOperand()->getFlags(), LN0
->getAAInfo());
9813 Load
= DAG
.getExtLoad(ExtType
, SDLoc(N0
), VT
, LN0
->getChain(), NewPtr
,
9814 LN0
->getPointerInfo().getWithOffset(PtrOff
), ExtVT
,
9815 NewAlign
, LN0
->getMemOperand()->getFlags(),
9818 // Replace the old load's chain with the new load's chain.
9819 WorklistRemover
DeadNodes(*this);
9820 DAG
.ReplaceAllUsesOfValueWith(N0
.getValue(1), Load
.getValue(1));
9822 // Shift the result left, if we've swallowed a left shift.
9823 SDValue Result
= Load
;
9824 if (ShLeftAmt
!= 0) {
9825 EVT ShImmTy
= getShiftAmountTy(Result
.getValueType());
9826 if (!isUIntN(ShImmTy
.getSizeInBits(), ShLeftAmt
))
9828 // If the shift amount is as large as the result size (but, presumably,
9829 // no larger than the source) then the useful bits of the result are
9830 // zero; we can't simply return the shortened shift, because the result
9831 // of that operation is undefined.
9833 if (ShLeftAmt
>= VT
.getSizeInBits())
9834 Result
= DAG
.getConstant(0, DL
, VT
);
9836 Result
= DAG
.getNode(ISD::SHL
, DL
, VT
,
9837 Result
, DAG
.getConstant(ShLeftAmt
, DL
, ShImmTy
));
9840 if (HasShiftedOffset
) {
9841 // Recalculate the shift amount after it has been altered to calculate
9843 if (DAG
.getDataLayout().isBigEndian())
9844 ShAmt
= AdjustBigEndianShift(ShAmt
);
9846 // We're using a shifted mask, so the load now has an offset. This means
9847 // that data has been loaded into the lower bytes than it would have been
9848 // before, so we need to shl the loaded data into the correct position in the
9850 SDValue ShiftC
= DAG
.getConstant(ShAmt
, DL
, VT
);
9851 Result
= DAG
.getNode(ISD::SHL
, DL
, VT
, Result
, ShiftC
);
9852 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
9855 // Return the new loaded value.
9859 SDValue
DAGCombiner::visitSIGN_EXTEND_INREG(SDNode
*N
) {
9860 SDValue N0
= N
->getOperand(0);
9861 SDValue N1
= N
->getOperand(1);
9862 EVT VT
= N
->getValueType(0);
9863 EVT EVT
= cast
<VTSDNode
>(N1
)->getVT();
9864 unsigned VTBits
= VT
.getScalarSizeInBits();
9865 unsigned EVTBits
= EVT
.getScalarSizeInBits();
9868 return DAG
.getUNDEF(VT
);
9870 // fold (sext_in_reg c1) -> c1
9871 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
))
9872 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, SDLoc(N
), VT
, N0
, N1
);
9874 // If the input is already sign extended, just drop the extension.
9875 if (DAG
.ComputeNumSignBits(N0
) >= VTBits
-EVTBits
+1)
9878 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9879 if (N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
9880 EVT
.bitsLT(cast
<VTSDNode
>(N0
.getOperand(1))->getVT()))
9881 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, SDLoc(N
), VT
,
9882 N0
.getOperand(0), N1
);
9884 // fold (sext_in_reg (sext x)) -> (sext x)
9885 // fold (sext_in_reg (aext x)) -> (sext x)
9886 // if x is small enough or if we know that x has more than 1 sign bit and the
9887 // sign_extend_inreg is extending from one of them.
9888 if (N0
.getOpcode() == ISD::SIGN_EXTEND
|| N0
.getOpcode() == ISD::ANY_EXTEND
) {
9889 SDValue N00
= N0
.getOperand(0);
9890 unsigned N00Bits
= N00
.getScalarValueSizeInBits();
9891 if ((N00Bits
<= EVTBits
||
9892 (N00Bits
- DAG
.ComputeNumSignBits(N00
)) < EVTBits
) &&
9893 (!LegalOperations
|| TLI
.isOperationLegal(ISD::SIGN_EXTEND
, VT
)))
9894 return DAG
.getNode(ISD::SIGN_EXTEND
, SDLoc(N
), VT
, N00
);
9897 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9898 if ((N0
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
||
9899 N0
.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG
||
9900 N0
.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG
) &&
9901 N0
.getOperand(0).getScalarValueSizeInBits() == EVTBits
) {
9902 if (!LegalOperations
||
9903 TLI
.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG
, VT
))
9904 return DAG
.getNode(ISD::SIGN_EXTEND_VECTOR_INREG
, SDLoc(N
), VT
,
9908 // fold (sext_in_reg (zext x)) -> (sext x)
9909 // iff we are extending the source sign bit.
9910 if (N0
.getOpcode() == ISD::ZERO_EXTEND
) {
9911 SDValue N00
= N0
.getOperand(0);
9912 if (N00
.getScalarValueSizeInBits() == EVTBits
&&
9913 (!LegalOperations
|| TLI
.isOperationLegal(ISD::SIGN_EXTEND
, VT
)))
9914 return DAG
.getNode(ISD::SIGN_EXTEND
, SDLoc(N
), VT
, N00
, N1
);
9917 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9918 if (DAG
.MaskedValueIsZero(N0
, APInt::getOneBitSet(VTBits
, EVTBits
- 1)))
9919 return DAG
.getZeroExtendInReg(N0
, SDLoc(N
), EVT
.getScalarType());
9921 // fold operands of sext_in_reg based on knowledge that the top bits are not
9923 if (SimplifyDemandedBits(SDValue(N
, 0)))
9924 return SDValue(N
, 0);
9926 // fold (sext_in_reg (load x)) -> (smaller sextload x)
9927 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9928 if (SDValue NarrowLoad
= ReduceLoadWidth(N
))
9931 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9932 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9933 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9934 if (N0
.getOpcode() == ISD::SRL
) {
9935 if (ConstantSDNode
*ShAmt
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1)))
9936 if (ShAmt
->getZExtValue()+EVTBits
<= VTBits
) {
9937 // We can turn this into an SRA iff the input to the SRL is already sign
9939 unsigned InSignBits
= DAG
.ComputeNumSignBits(N0
.getOperand(0));
9940 if (VTBits
-(ShAmt
->getZExtValue()+EVTBits
) < InSignBits
)
9941 return DAG
.getNode(ISD::SRA
, SDLoc(N
), VT
,
9942 N0
.getOperand(0), N0
.getOperand(1));
9946 // fold (sext_inreg (extload x)) -> (sextload x)
9947 // If sextload is not supported by target, we can only do the combine when
9948 // load has one use. Doing otherwise can block folding the extload with other
9949 // extends that the target does support.
9950 if (ISD::isEXTLoad(N0
.getNode()) &&
9951 ISD::isUNINDEXEDLoad(N0
.getNode()) &&
9952 EVT
== cast
<LoadSDNode
>(N0
)->getMemoryVT() &&
9953 ((!LegalOperations
&& !cast
<LoadSDNode
>(N0
)->isVolatile() &&
9955 TLI
.isLoadExtLegal(ISD::SEXTLOAD
, VT
, EVT
))) {
9956 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
9957 SDValue ExtLoad
= DAG
.getExtLoad(ISD::SEXTLOAD
, SDLoc(N
), VT
,
9959 LN0
->getBasePtr(), EVT
,
9960 LN0
->getMemOperand());
9961 CombineTo(N
, ExtLoad
);
9962 CombineTo(N0
.getNode(), ExtLoad
, ExtLoad
.getValue(1));
9963 AddToWorklist(ExtLoad
.getNode());
9964 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
9966 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9967 if (ISD::isZEXTLoad(N0
.getNode()) && ISD::isUNINDEXEDLoad(N0
.getNode()) &&
9969 EVT
== cast
<LoadSDNode
>(N0
)->getMemoryVT() &&
9970 ((!LegalOperations
&& !cast
<LoadSDNode
>(N0
)->isVolatile()) ||
9971 TLI
.isLoadExtLegal(ISD::SEXTLOAD
, VT
, EVT
))) {
9972 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
9973 SDValue ExtLoad
= DAG
.getExtLoad(ISD::SEXTLOAD
, SDLoc(N
), VT
,
9975 LN0
->getBasePtr(), EVT
,
9976 LN0
->getMemOperand());
9977 CombineTo(N
, ExtLoad
);
9978 CombineTo(N0
.getNode(), ExtLoad
, ExtLoad
.getValue(1));
9979 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
9982 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9983 if (EVTBits
<= 16 && N0
.getOpcode() == ISD::OR
) {
9984 if (SDValue BSwap
= MatchBSwapHWordLow(N0
.getNode(), N0
.getOperand(0),
9985 N0
.getOperand(1), false))
9986 return DAG
.getNode(ISD::SIGN_EXTEND_INREG
, SDLoc(N
), VT
,
9993 SDValue
DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode
*N
) {
9994 SDValue N0
= N
->getOperand(0);
9995 EVT VT
= N
->getValueType(0);
9998 return DAG
.getUNDEF(VT
);
10000 if (SDValue Res
= tryToFoldExtendOfConstant(N
, TLI
, DAG
, LegalTypes
))
10003 if (SimplifyDemandedVectorElts(SDValue(N
, 0)))
10004 return SDValue(N
, 0);
10009 SDValue
DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode
*N
) {
10010 SDValue N0
= N
->getOperand(0);
10011 EVT VT
= N
->getValueType(0);
10014 return DAG
.getUNDEF(VT
);
10016 if (SDValue Res
= tryToFoldExtendOfConstant(N
, TLI
, DAG
, LegalTypes
))
10019 if (SimplifyDemandedVectorElts(SDValue(N
, 0)))
10020 return SDValue(N
, 0);
10025 SDValue
DAGCombiner::visitTRUNCATE(SDNode
*N
) {
10026 SDValue N0
= N
->getOperand(0);
10027 EVT VT
= N
->getValueType(0);
10028 EVT SrcVT
= N0
.getValueType();
10029 bool isLE
= DAG
.getDataLayout().isLittleEndian();
10035 // fold (truncate (truncate x)) -> (truncate x)
10036 if (N0
.getOpcode() == ISD::TRUNCATE
)
10037 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, N0
.getOperand(0));
10039 // fold (truncate c1) -> c1
10040 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
)) {
10041 SDValue C
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, N0
);
10042 if (C
.getNode() != N
)
10046 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10047 if (N0
.getOpcode() == ISD::ZERO_EXTEND
||
10048 N0
.getOpcode() == ISD::SIGN_EXTEND
||
10049 N0
.getOpcode() == ISD::ANY_EXTEND
) {
10050 // if the source is smaller than the dest, we still need an extend.
10051 if (N0
.getOperand(0).getValueType().bitsLT(VT
))
10052 return DAG
.getNode(N0
.getOpcode(), SDLoc(N
), VT
, N0
.getOperand(0));
10053 // if the source is larger than the dest, than we just need the truncate.
10054 if (N0
.getOperand(0).getValueType().bitsGT(VT
))
10055 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, N0
.getOperand(0));
10056 // if the source and dest are the same type, we can drop both the extend
10057 // and the truncate.
10058 return N0
.getOperand(0);
10061 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10062 if (N
->hasOneUse() && (N
->use_begin()->getOpcode() == ISD::ANY_EXTEND
))
10065 // Fold extract-and-trunc into a narrow extract. For example:
10066 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10067 // i32 y = TRUNCATE(i64 x)
10069 // v16i8 b = BITCAST (v2i64 val)
10070 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10072 // Note: We only run this optimization after type legalization (which often
10073 // creates this pattern) and before operation legalization after which
10074 // we need to be more careful about the vector instructions that we generate.
10075 if (N0
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
10076 LegalTypes
&& !LegalOperations
&& N0
->hasOneUse() && VT
!= MVT::i1
) {
10077 EVT VecTy
= N0
.getOperand(0).getValueType();
10078 EVT ExTy
= N0
.getValueType();
10079 EVT TrTy
= N
->getValueType(0);
10081 unsigned NumElem
= VecTy
.getVectorNumElements();
10082 unsigned SizeRatio
= ExTy
.getSizeInBits()/TrTy
.getSizeInBits();
10084 EVT NVT
= EVT::getVectorVT(*DAG
.getContext(), TrTy
, SizeRatio
* NumElem
);
10085 assert(NVT
.getSizeInBits() == VecTy
.getSizeInBits() && "Invalid Size");
10087 SDValue EltNo
= N0
->getOperand(1);
10088 if (isa
<ConstantSDNode
>(EltNo
) && isTypeLegal(NVT
)) {
10089 int Elt
= cast
<ConstantSDNode
>(EltNo
)->getZExtValue();
10090 EVT IndexTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
10091 int Index
= isLE
? (Elt
*SizeRatio
) : (Elt
*SizeRatio
+ (SizeRatio
-1));
10094 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, TrTy
,
10095 DAG
.getBitcast(NVT
, N0
.getOperand(0)),
10096 DAG
.getConstant(Index
, DL
, IndexTy
));
10100 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10101 if (N0
.getOpcode() == ISD::SELECT
&& N0
.hasOneUse()) {
10102 if ((!LegalOperations
|| TLI
.isOperationLegal(ISD::SELECT
, SrcVT
)) &&
10103 TLI
.isTruncateFree(SrcVT
, VT
)) {
10105 SDValue Cond
= N0
.getOperand(0);
10106 SDValue TruncOp0
= DAG
.getNode(ISD::TRUNCATE
, SL
, VT
, N0
.getOperand(1));
10107 SDValue TruncOp1
= DAG
.getNode(ISD::TRUNCATE
, SL
, VT
, N0
.getOperand(2));
10108 return DAG
.getNode(ISD::SELECT
, SDLoc(N
), VT
, Cond
, TruncOp0
, TruncOp1
);
10112 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10113 if (N0
.getOpcode() == ISD::SHL
&& N0
.hasOneUse() &&
10114 (!LegalOperations
|| TLI
.isOperationLegalOrCustom(ISD::SHL
, VT
)) &&
10115 TLI
.isTypeDesirableForOp(ISD::SHL
, VT
)) {
10116 SDValue Amt
= N0
.getOperand(1);
10117 KnownBits Known
= DAG
.computeKnownBits(Amt
);
10118 unsigned Size
= VT
.getScalarSizeInBits();
10119 if (Known
.getBitWidth() - Known
.countMinLeadingZeros() <= Log2_32(Size
)) {
10121 EVT AmtVT
= TLI
.getShiftAmountTy(VT
, DAG
.getDataLayout());
10123 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SL
, VT
, N0
.getOperand(0));
10124 if (AmtVT
!= Amt
.getValueType()) {
10125 Amt
= DAG
.getZExtOrTrunc(Amt
, SL
, AmtVT
);
10126 AddToWorklist(Amt
.getNode());
10128 return DAG
.getNode(ISD::SHL
, SL
, VT
, Trunc
, Amt
);
10132 // Attempt to pre-truncate BUILD_VECTOR sources.
10133 if (N0
.getOpcode() == ISD::BUILD_VECTOR
&& !LegalOperations
&&
10134 TLI
.isTruncateFree(SrcVT
.getScalarType(), VT
.getScalarType())) {
10136 EVT SVT
= VT
.getScalarType();
10137 SmallVector
<SDValue
, 8> TruncOps
;
10138 for (const SDValue
&Op
: N0
->op_values()) {
10139 SDValue TruncOp
= DAG
.getNode(ISD::TRUNCATE
, DL
, SVT
, Op
);
10140 TruncOps
.push_back(TruncOp
);
10142 return DAG
.getBuildVector(VT
, DL
, TruncOps
);
10145 // Fold a series of buildvector, bitcast, and truncate if possible.
10146 // For example fold
10147 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10148 // (2xi32 (buildvector x, y)).
10149 if (Level
== AfterLegalizeVectorOps
&& VT
.isVector() &&
10150 N0
.getOpcode() == ISD::BITCAST
&& N0
.hasOneUse() &&
10151 N0
.getOperand(0).getOpcode() == ISD::BUILD_VECTOR
&&
10152 N0
.getOperand(0).hasOneUse()) {
10153 SDValue BuildVect
= N0
.getOperand(0);
10154 EVT BuildVectEltTy
= BuildVect
.getValueType().getVectorElementType();
10155 EVT TruncVecEltTy
= VT
.getVectorElementType();
10157 // Check that the element types match.
10158 if (BuildVectEltTy
== TruncVecEltTy
) {
10159 // Now we only need to compute the offset of the truncated elements.
10160 unsigned BuildVecNumElts
= BuildVect
.getNumOperands();
10161 unsigned TruncVecNumElts
= VT
.getVectorNumElements();
10162 unsigned TruncEltOffset
= BuildVecNumElts
/ TruncVecNumElts
;
10164 assert((BuildVecNumElts
% TruncVecNumElts
) == 0 &&
10165 "Invalid number of elements");
10167 SmallVector
<SDValue
, 8> Opnds
;
10168 for (unsigned i
= 0, e
= BuildVecNumElts
; i
!= e
; i
+= TruncEltOffset
)
10169 Opnds
.push_back(BuildVect
.getOperand(i
));
10171 return DAG
.getBuildVector(VT
, SDLoc(N
), Opnds
);
10175 // See if we can simplify the input to this truncate through knowledge that
10176 // only the low bits are being used.
10177 // For example "trunc (or (shl x, 8), y)" // -> trunc y
10178 // Currently we only perform this optimization on scalars because vectors
10179 // may have different active low bits.
10180 if (!VT
.isVector()) {
10182 APInt::getLowBitsSet(N0
.getValueSizeInBits(), VT
.getSizeInBits());
10183 if (SDValue Shorter
= DAG
.GetDemandedBits(N0
, Mask
))
10184 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, Shorter
);
10187 // fold (truncate (load x)) -> (smaller load x)
10188 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10189 if (!LegalTypes
|| TLI
.isTypeDesirableForOp(N0
.getOpcode(), VT
)) {
10190 if (SDValue Reduced
= ReduceLoadWidth(N
))
10193 // Handle the case where the load remains an extending load even
10194 // after truncation.
10195 if (N0
.hasOneUse() && ISD::isUNINDEXEDLoad(N0
.getNode())) {
10196 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
10197 if (!LN0
->isVolatile() &&
10198 LN0
->getMemoryVT().getStoreSizeInBits() < VT
.getSizeInBits()) {
10199 SDValue NewLoad
= DAG
.getExtLoad(LN0
->getExtensionType(), SDLoc(LN0
),
10200 VT
, LN0
->getChain(), LN0
->getBasePtr(),
10201 LN0
->getMemoryVT(),
10202 LN0
->getMemOperand());
10203 DAG
.ReplaceAllUsesOfValueWith(N0
.getValue(1), NewLoad
.getValue(1));
10209 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10210 // where ... are all 'undef'.
10211 if (N0
.getOpcode() == ISD::CONCAT_VECTORS
&& !LegalTypes
) {
10212 SmallVector
<EVT
, 8> VTs
;
10215 unsigned NumDefs
= 0;
10217 for (unsigned i
= 0, e
= N0
.getNumOperands(); i
!= e
; ++i
) {
10218 SDValue X
= N0
.getOperand(i
);
10219 if (!X
.isUndef()) {
10224 // Stop if more than one members are non-undef.
10227 VTs
.push_back(EVT::getVectorVT(*DAG
.getContext(),
10228 VT
.getVectorElementType(),
10229 X
.getValueType().getVectorNumElements()));
10233 return DAG
.getUNDEF(VT
);
10235 if (NumDefs
== 1) {
10236 assert(V
.getNode() && "The single defined operand is empty!");
10237 SmallVector
<SDValue
, 8> Opnds
;
10238 for (unsigned i
= 0, e
= VTs
.size(); i
!= e
; ++i
) {
10240 Opnds
.push_back(DAG
.getUNDEF(VTs
[i
]));
10243 SDValue NV
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(V
), VTs
[i
], V
);
10244 AddToWorklist(NV
.getNode());
10245 Opnds
.push_back(NV
);
10247 return DAG
.getNode(ISD::CONCAT_VECTORS
, SDLoc(N
), VT
, Opnds
);
10251 // Fold truncate of a bitcast of a vector to an extract of the low vector
10254 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10255 if (N0
.getOpcode() == ISD::BITCAST
&& !VT
.isVector()) {
10256 SDValue VecSrc
= N0
.getOperand(0);
10257 EVT SrcVT
= VecSrc
.getValueType();
10258 if (SrcVT
.isVector() && SrcVT
.getScalarType() == VT
&&
10259 (!LegalOperations
||
10260 TLI
.isOperationLegal(ISD::EXTRACT_VECTOR_ELT
, SrcVT
))) {
10263 EVT IdxVT
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
10264 unsigned Idx
= isLE
? 0 : SrcVT
.getVectorNumElements() - 1;
10265 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SL
, VT
,
10266 VecSrc
, DAG
.getConstant(Idx
, SL
, IdxVT
));
10270 // Simplify the operands using demanded-bits information.
10271 if (!VT
.isVector() &&
10272 SimplifyDemandedBits(SDValue(N
, 0)))
10273 return SDValue(N
, 0);
10275 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10276 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10277 // When the adde's carry is not used.
10278 if ((N0
.getOpcode() == ISD::ADDE
|| N0
.getOpcode() == ISD::ADDCARRY
) &&
10279 N0
.hasOneUse() && !N0
.getNode()->hasAnyUseOfValue(1) &&
10280 // We only do for addcarry before legalize operation
10281 ((!LegalOperations
&& N0
.getOpcode() == ISD::ADDCARRY
) ||
10282 TLI
.isOperationLegal(N0
.getOpcode(), VT
))) {
10284 auto X
= DAG
.getNode(ISD::TRUNCATE
, SL
, VT
, N0
.getOperand(0));
10285 auto Y
= DAG
.getNode(ISD::TRUNCATE
, SL
, VT
, N0
.getOperand(1));
10286 auto VTs
= DAG
.getVTList(VT
, N0
->getValueType(1));
10287 return DAG
.getNode(N0
.getOpcode(), SL
, VTs
, X
, Y
, N0
.getOperand(2));
10290 // fold (truncate (extract_subvector(ext x))) ->
10291 // (extract_subvector x)
10292 // TODO: This can be generalized to cover cases where the truncate and extract
10293 // do not fully cancel each other out.
10294 if (!LegalTypes
&& N0
.getOpcode() == ISD::EXTRACT_SUBVECTOR
) {
10295 SDValue N00
= N0
.getOperand(0);
10296 if (N00
.getOpcode() == ISD::SIGN_EXTEND
||
10297 N00
.getOpcode() == ISD::ZERO_EXTEND
||
10298 N00
.getOpcode() == ISD::ANY_EXTEND
) {
10299 if (N00
.getOperand(0)->getValueType(0).getVectorElementType() ==
10300 VT
.getVectorElementType())
10301 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, SDLoc(N0
->getOperand(0)), VT
,
10302 N00
.getOperand(0), N0
.getOperand(1));
10306 if (SDValue NewVSel
= matchVSelectOpSizesWithSetCC(N
))
10309 // Narrow a suitable binary operation with a non-opaque constant operand by
10310 // moving it ahead of the truncate. This is limited to pre-legalization
10311 // because targets may prefer a wider type during later combines and invert
10313 switch (N0
.getOpcode()) {
10320 if (!LegalOperations
&& N0
.hasOneUse() &&
10321 (isConstantOrConstantVector(N0
.getOperand(0), true) ||
10322 isConstantOrConstantVector(N0
.getOperand(1), true))) {
10323 // TODO: We already restricted this to pre-legalization, but for vectors
10324 // we are extra cautious to not create an unsupported operation.
10325 // Target-specific changes are likely needed to avoid regressions here.
10326 if (VT
.isScalarInteger() || TLI
.isOperationLegal(N0
.getOpcode(), VT
)) {
10328 SDValue NarrowL
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N0
.getOperand(0));
10329 SDValue NarrowR
= DAG
.getNode(ISD::TRUNCATE
, DL
, VT
, N0
.getOperand(1));
10330 return DAG
.getNode(N0
.getOpcode(), DL
, VT
, NarrowL
, NarrowR
);
10338 static SDNode
*getBuildPairElt(SDNode
*N
, unsigned i
) {
10339 SDValue Elt
= N
->getOperand(i
);
10340 if (Elt
.getOpcode() != ISD::MERGE_VALUES
)
10341 return Elt
.getNode();
10342 return Elt
.getOperand(Elt
.getResNo()).getNode();
10345 /// build_pair (load, load) -> load
10346 /// if load locations are consecutive.
10347 SDValue
DAGCombiner::CombineConsecutiveLoads(SDNode
*N
, EVT VT
) {
10348 assert(N
->getOpcode() == ISD::BUILD_PAIR
);
10350 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(getBuildPairElt(N
, 0));
10351 LoadSDNode
*LD2
= dyn_cast
<LoadSDNode
>(getBuildPairElt(N
, 1));
10353 // A BUILD_PAIR is always having the least significant part in elt 0 and the
10354 // most significant part in elt 1. So when combining into one large load, we
10355 // need to consider the endianness.
10356 if (DAG
.getDataLayout().isBigEndian())
10357 std::swap(LD1
, LD2
);
10359 if (!LD1
|| !LD2
|| !ISD::isNON_EXTLoad(LD1
) || !LD1
->hasOneUse() ||
10360 LD1
->getAddressSpace() != LD2
->getAddressSpace())
10362 EVT LD1VT
= LD1
->getValueType(0);
10363 unsigned LD1Bytes
= LD1VT
.getStoreSize();
10364 if (ISD::isNON_EXTLoad(LD2
) && LD2
->hasOneUse() &&
10365 DAG
.areNonVolatileConsecutiveLoads(LD2
, LD1
, LD1Bytes
, 1)) {
10366 unsigned Align
= LD1
->getAlignment();
10367 unsigned NewAlign
= DAG
.getDataLayout().getABITypeAlignment(
10368 VT
.getTypeForEVT(*DAG
.getContext()));
10370 if (NewAlign
<= Align
&&
10371 (!LegalOperations
|| TLI
.isOperationLegal(ISD::LOAD
, VT
)))
10372 return DAG
.getLoad(VT
, SDLoc(N
), LD1
->getChain(), LD1
->getBasePtr(),
10373 LD1
->getPointerInfo(), Align
);
10379 static unsigned getPPCf128HiElementSelector(const SelectionDAG
&DAG
) {
10380 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10381 // and Lo parts; on big-endian machines it doesn't.
10382 return DAG
.getDataLayout().isBigEndian() ? 1 : 0;
10385 static SDValue
foldBitcastedFPLogic(SDNode
*N
, SelectionDAG
&DAG
,
10386 const TargetLowering
&TLI
) {
10387 // If this is not a bitcast to an FP type or if the target doesn't have
10388 // IEEE754-compliant FP logic, we're done.
10389 EVT VT
= N
->getValueType(0);
10390 if (!VT
.isFloatingPoint() || !TLI
.hasBitPreservingFPLogic(VT
))
10393 // TODO: Handle cases where the integer constant is a different scalar
10394 // bitwidth to the FP.
10395 SDValue N0
= N
->getOperand(0);
10396 EVT SourceVT
= N0
.getValueType();
10397 if (VT
.getScalarSizeInBits() != SourceVT
.getScalarSizeInBits())
10402 switch (N0
.getOpcode()) {
10404 FPOpcode
= ISD::FABS
;
10405 SignMask
= ~APInt::getSignMask(SourceVT
.getScalarSizeInBits());
10408 FPOpcode
= ISD::FNEG
;
10409 SignMask
= APInt::getSignMask(SourceVT
.getScalarSizeInBits());
10412 FPOpcode
= ISD::FABS
;
10413 SignMask
= APInt::getSignMask(SourceVT
.getScalarSizeInBits());
10419 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10420 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10421 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10423 SDValue LogicOp0
= N0
.getOperand(0);
10424 ConstantSDNode
*LogicOp1
= isConstOrConstSplat(N0
.getOperand(1), true);
10425 if (LogicOp1
&& LogicOp1
->getAPIntValue() == SignMask
&&
10426 LogicOp0
.getOpcode() == ISD::BITCAST
&&
10427 LogicOp0
.getOperand(0).getValueType() == VT
) {
10428 SDValue FPOp
= DAG
.getNode(FPOpcode
, SDLoc(N
), VT
, LogicOp0
.getOperand(0));
10429 NumFPLogicOpsConv
++;
10430 if (N0
.getOpcode() == ISD::OR
)
10431 return DAG
.getNode(ISD::FNEG
, SDLoc(N
), VT
, FPOp
);
10438 SDValue
DAGCombiner::visitBITCAST(SDNode
*N
) {
10439 SDValue N0
= N
->getOperand(0);
10440 EVT VT
= N
->getValueType(0);
10443 return DAG
.getUNDEF(VT
);
10445 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10446 // Only do this before legalize types, unless both types are integer and the
10447 // scalar type is legal. Only do this before legalize ops, since the target
10448 // maybe depending on the bitcast.
10449 // First check to see if this is all constant.
10450 // TODO: Support FP bitcasts after legalize types.
10451 if (VT
.isVector() &&
10453 (!LegalOperations
&& VT
.isInteger() && N0
.getValueType().isInteger() &&
10454 TLI
.isTypeLegal(VT
.getVectorElementType()))) &&
10455 N0
.getOpcode() == ISD::BUILD_VECTOR
&& N0
.getNode()->hasOneUse() &&
10456 cast
<BuildVectorSDNode
>(N0
)->isConstant())
10457 return ConstantFoldBITCASTofBUILD_VECTOR(N0
.getNode(),
10458 VT
.getVectorElementType());
10460 // If the input is a constant, let getNode fold it.
10461 if (isa
<ConstantSDNode
>(N0
) || isa
<ConstantFPSDNode
>(N0
)) {
10462 // If we can't allow illegal operations, we need to check that this is just
10463 // a fp -> int or int -> conversion and that the resulting operation will
10465 if (!LegalOperations
||
10466 (isa
<ConstantSDNode
>(N0
) && VT
.isFloatingPoint() && !VT
.isVector() &&
10467 TLI
.isOperationLegal(ISD::ConstantFP
, VT
)) ||
10468 (isa
<ConstantFPSDNode
>(N0
) && VT
.isInteger() && !VT
.isVector() &&
10469 TLI
.isOperationLegal(ISD::Constant
, VT
))) {
10470 SDValue C
= DAG
.getBitcast(VT
, N0
);
10471 if (C
.getNode() != N
)
10476 // (conv (conv x, t1), t2) -> (conv x, t2)
10477 if (N0
.getOpcode() == ISD::BITCAST
)
10478 return DAG
.getBitcast(VT
, N0
.getOperand(0));
10480 // fold (conv (load x)) -> (load (conv*)x)
10481 // If the resultant load doesn't need a higher alignment than the original!
10482 if (ISD::isNormalLoad(N0
.getNode()) && N0
.hasOneUse() &&
10483 // Do not remove the cast if the types differ in endian layout.
10484 TLI
.hasBigEndianPartOrdering(N0
.getValueType(), DAG
.getDataLayout()) ==
10485 TLI
.hasBigEndianPartOrdering(VT
, DAG
.getDataLayout()) &&
10486 // If the load is volatile, we only want to change the load type if the
10487 // resulting load is legal. Otherwise we might increase the number of
10488 // memory accesses. We don't care if the original type was legal or not
10489 // as we assume software couldn't rely on the number of accesses of an
10491 ((!LegalOperations
&& !cast
<LoadSDNode
>(N0
)->isVolatile()) ||
10492 TLI
.isOperationLegal(ISD::LOAD
, VT
)) &&
10493 TLI
.isLoadBitCastBeneficial(N0
.getValueType(), VT
)) {
10494 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
10495 unsigned OrigAlign
= LN0
->getAlignment();
10498 if (TLI
.allowsMemoryAccess(*DAG
.getContext(), DAG
.getDataLayout(), VT
,
10499 LN0
->getAddressSpace(), OrigAlign
, &Fast
) &&
10502 DAG
.getLoad(VT
, SDLoc(N
), LN0
->getChain(), LN0
->getBasePtr(),
10503 LN0
->getPointerInfo(), OrigAlign
,
10504 LN0
->getMemOperand()->getFlags(), LN0
->getAAInfo());
10505 DAG
.ReplaceAllUsesOfValueWith(N0
.getValue(1), Load
.getValue(1));
10510 if (SDValue V
= foldBitcastedFPLogic(N
, DAG
, TLI
))
10513 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10514 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10517 // fold (bitcast (fneg x)) ->
10518 // flipbit = signbit
10519 // (xor (bitcast x) (build_pair flipbit, flipbit))
10521 // fold (bitcast (fabs x)) ->
10522 // flipbit = (and (extract_element (bitcast x), 0), signbit)
10523 // (xor (bitcast x) (build_pair flipbit, flipbit))
10524 // This often reduces constant pool loads.
10525 if (((N0
.getOpcode() == ISD::FNEG
&& !TLI
.isFNegFree(N0
.getValueType())) ||
10526 (N0
.getOpcode() == ISD::FABS
&& !TLI
.isFAbsFree(N0
.getValueType()))) &&
10527 N0
.getNode()->hasOneUse() && VT
.isInteger() &&
10528 !VT
.isVector() && !N0
.getValueType().isVector()) {
10529 SDValue NewConv
= DAG
.getBitcast(VT
, N0
.getOperand(0));
10530 AddToWorklist(NewConv
.getNode());
10533 if (N0
.getValueType() == MVT::ppcf128
&& !LegalTypes
) {
10534 assert(VT
.getSizeInBits() == 128);
10535 SDValue SignBit
= DAG
.getConstant(
10536 APInt::getSignMask(VT
.getSizeInBits() / 2), SDLoc(N0
), MVT::i64
);
10538 if (N0
.getOpcode() == ISD::FNEG
) {
10540 AddToWorklist(FlipBit
.getNode());
10542 assert(N0
.getOpcode() == ISD::FABS
);
10544 DAG
.getNode(ISD::EXTRACT_ELEMENT
, SDLoc(NewConv
), MVT::i64
, NewConv
,
10545 DAG
.getIntPtrConstant(getPPCf128HiElementSelector(DAG
),
10547 AddToWorklist(Hi
.getNode());
10548 FlipBit
= DAG
.getNode(ISD::AND
, SDLoc(N0
), MVT::i64
, Hi
, SignBit
);
10549 AddToWorklist(FlipBit
.getNode());
10552 DAG
.getNode(ISD::BUILD_PAIR
, SDLoc(N0
), VT
, FlipBit
, FlipBit
);
10553 AddToWorklist(FlipBits
.getNode());
10554 return DAG
.getNode(ISD::XOR
, DL
, VT
, NewConv
, FlipBits
);
10556 APInt SignBit
= APInt::getSignMask(VT
.getSizeInBits());
10557 if (N0
.getOpcode() == ISD::FNEG
)
10558 return DAG
.getNode(ISD::XOR
, DL
, VT
,
10559 NewConv
, DAG
.getConstant(SignBit
, DL
, VT
));
10560 assert(N0
.getOpcode() == ISD::FABS
);
10561 return DAG
.getNode(ISD::AND
, DL
, VT
,
10562 NewConv
, DAG
.getConstant(~SignBit
, DL
, VT
));
10565 // fold (bitconvert (fcopysign cst, x)) ->
10566 // (or (and (bitconvert x), sign), (and cst, (not sign)))
10567 // Note that we don't handle (copysign x, cst) because this can always be
10568 // folded to an fneg or fabs.
10571 // fold (bitcast (fcopysign cst, x)) ->
10572 // flipbit = (and (extract_element
10573 // (xor (bitcast cst), (bitcast x)), 0),
10575 // (xor (bitcast cst) (build_pair flipbit, flipbit))
10576 if (N0
.getOpcode() == ISD::FCOPYSIGN
&& N0
.getNode()->hasOneUse() &&
10577 isa
<ConstantFPSDNode
>(N0
.getOperand(0)) &&
10578 VT
.isInteger() && !VT
.isVector()) {
10579 unsigned OrigXWidth
= N0
.getOperand(1).getValueSizeInBits();
10580 EVT IntXVT
= EVT::getIntegerVT(*DAG
.getContext(), OrigXWidth
);
10581 if (isTypeLegal(IntXVT
)) {
10582 SDValue X
= DAG
.getBitcast(IntXVT
, N0
.getOperand(1));
10583 AddToWorklist(X
.getNode());
10585 // If X has a different width than the result/lhs, sext it or truncate it.
10586 unsigned VTWidth
= VT
.getSizeInBits();
10587 if (OrigXWidth
< VTWidth
) {
10588 X
= DAG
.getNode(ISD::SIGN_EXTEND
, SDLoc(N
), VT
, X
);
10589 AddToWorklist(X
.getNode());
10590 } else if (OrigXWidth
> VTWidth
) {
10591 // To get the sign bit in the right place, we have to shift it right
10592 // before truncating.
10594 X
= DAG
.getNode(ISD::SRL
, DL
,
10595 X
.getValueType(), X
,
10596 DAG
.getConstant(OrigXWidth
-VTWidth
, DL
,
10597 X
.getValueType()));
10598 AddToWorklist(X
.getNode());
10599 X
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(X
), VT
, X
);
10600 AddToWorklist(X
.getNode());
10603 if (N0
.getValueType() == MVT::ppcf128
&& !LegalTypes
) {
10604 APInt SignBit
= APInt::getSignMask(VT
.getSizeInBits() / 2);
10605 SDValue Cst
= DAG
.getBitcast(VT
, N0
.getOperand(0));
10606 AddToWorklist(Cst
.getNode());
10607 SDValue X
= DAG
.getBitcast(VT
, N0
.getOperand(1));
10608 AddToWorklist(X
.getNode());
10609 SDValue XorResult
= DAG
.getNode(ISD::XOR
, SDLoc(N0
), VT
, Cst
, X
);
10610 AddToWorklist(XorResult
.getNode());
10611 SDValue XorResult64
= DAG
.getNode(
10612 ISD::EXTRACT_ELEMENT
, SDLoc(XorResult
), MVT::i64
, XorResult
,
10613 DAG
.getIntPtrConstant(getPPCf128HiElementSelector(DAG
),
10614 SDLoc(XorResult
)));
10615 AddToWorklist(XorResult64
.getNode());
10617 DAG
.getNode(ISD::AND
, SDLoc(XorResult64
), MVT::i64
, XorResult64
,
10618 DAG
.getConstant(SignBit
, SDLoc(XorResult64
), MVT::i64
));
10619 AddToWorklist(FlipBit
.getNode());
10621 DAG
.getNode(ISD::BUILD_PAIR
, SDLoc(N0
), VT
, FlipBit
, FlipBit
);
10622 AddToWorklist(FlipBits
.getNode());
10623 return DAG
.getNode(ISD::XOR
, SDLoc(N
), VT
, Cst
, FlipBits
);
10625 APInt SignBit
= APInt::getSignMask(VT
.getSizeInBits());
10626 X
= DAG
.getNode(ISD::AND
, SDLoc(X
), VT
,
10627 X
, DAG
.getConstant(SignBit
, SDLoc(X
), VT
));
10628 AddToWorklist(X
.getNode());
10630 SDValue Cst
= DAG
.getBitcast(VT
, N0
.getOperand(0));
10631 Cst
= DAG
.getNode(ISD::AND
, SDLoc(Cst
), VT
,
10632 Cst
, DAG
.getConstant(~SignBit
, SDLoc(Cst
), VT
));
10633 AddToWorklist(Cst
.getNode());
10635 return DAG
.getNode(ISD::OR
, SDLoc(N
), VT
, X
, Cst
);
10639 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10640 if (N0
.getOpcode() == ISD::BUILD_PAIR
)
10641 if (SDValue CombineLD
= CombineConsecutiveLoads(N0
.getNode(), VT
))
10644 // Remove double bitcasts from shuffles - this is often a legacy of
10645 // XformToShuffleWithZero being used to combine bitmaskings (of
10646 // float vectors bitcast to integer vectors) into shuffles.
10647 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10648 if (Level
< AfterLegalizeDAG
&& TLI
.isTypeLegal(VT
) && VT
.isVector() &&
10649 N0
->getOpcode() == ISD::VECTOR_SHUFFLE
&& N0
.hasOneUse() &&
10650 VT
.getVectorNumElements() >= N0
.getValueType().getVectorNumElements() &&
10651 !(VT
.getVectorNumElements() % N0
.getValueType().getVectorNumElements())) {
10652 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N0
);
10654 // If operands are a bitcast, peek through if it casts the original VT.
10655 // If operands are a constant, just bitcast back to original VT.
10656 auto PeekThroughBitcast
= [&](SDValue Op
) {
10657 if (Op
.getOpcode() == ISD::BITCAST
&&
10658 Op
.getOperand(0).getValueType() == VT
)
10659 return SDValue(Op
.getOperand(0));
10660 if (Op
.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op
.getNode()) ||
10661 ISD::isBuildVectorOfConstantFPSDNodes(Op
.getNode()))
10662 return DAG
.getBitcast(VT
, Op
);
10666 // FIXME: If either input vector is bitcast, try to convert the shuffle to
10667 // the result type of this bitcast. This would eliminate at least one
10668 // bitcast. See the transform in InstCombine.
10669 SDValue SV0
= PeekThroughBitcast(N0
->getOperand(0));
10670 SDValue SV1
= PeekThroughBitcast(N0
->getOperand(1));
10675 VT
.getVectorNumElements() / N0
.getValueType().getVectorNumElements();
10676 SmallVector
<int, 8> NewMask
;
10677 for (int M
: SVN
->getMask())
10678 for (int i
= 0; i
!= MaskScale
; ++i
)
10679 NewMask
.push_back(M
< 0 ? -1 : M
* MaskScale
+ i
);
10681 bool LegalMask
= TLI
.isShuffleMaskLegal(NewMask
, VT
);
10683 std::swap(SV0
, SV1
);
10684 ShuffleVectorSDNode::commuteMask(NewMask
);
10685 LegalMask
= TLI
.isShuffleMaskLegal(NewMask
, VT
);
10689 return DAG
.getVectorShuffle(VT
, SDLoc(N
), SV0
, SV1
, NewMask
);
10695 SDValue
DAGCombiner::visitBUILD_PAIR(SDNode
*N
) {
10696 EVT VT
= N
->getValueType(0);
10697 return CombineConsecutiveLoads(N
, VT
);
10700 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10701 /// operands. DstEltVT indicates the destination element value type.
10702 SDValue
DAGCombiner::
10703 ConstantFoldBITCASTofBUILD_VECTOR(SDNode
*BV
, EVT DstEltVT
) {
10704 EVT SrcEltVT
= BV
->getValueType(0).getVectorElementType();
10706 // If this is already the right type, we're done.
10707 if (SrcEltVT
== DstEltVT
) return SDValue(BV
, 0);
10709 unsigned SrcBitSize
= SrcEltVT
.getSizeInBits();
10710 unsigned DstBitSize
= DstEltVT
.getSizeInBits();
10712 // If this is a conversion of N elements of one type to N elements of another
10713 // type, convert each element. This handles FP<->INT cases.
10714 if (SrcBitSize
== DstBitSize
) {
10715 SmallVector
<SDValue
, 8> Ops
;
10716 for (SDValue Op
: BV
->op_values()) {
10717 // If the vector element type is not legal, the BUILD_VECTOR operands
10718 // are promoted and implicitly truncated. Make that explicit here.
10719 if (Op
.getValueType() != SrcEltVT
)
10720 Op
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(BV
), SrcEltVT
, Op
);
10721 Ops
.push_back(DAG
.getBitcast(DstEltVT
, Op
));
10722 AddToWorklist(Ops
.back().getNode());
10724 EVT VT
= EVT::getVectorVT(*DAG
.getContext(), DstEltVT
,
10725 BV
->getValueType(0).getVectorNumElements());
10726 return DAG
.getBuildVector(VT
, SDLoc(BV
), Ops
);
10729 // Otherwise, we're growing or shrinking the elements. To avoid having to
10730 // handle annoying details of growing/shrinking FP values, we convert them to
10732 if (SrcEltVT
.isFloatingPoint()) {
10733 // Convert the input float vector to a int vector where the elements are the
10735 EVT IntVT
= EVT::getIntegerVT(*DAG
.getContext(), SrcEltVT
.getSizeInBits());
10736 BV
= ConstantFoldBITCASTofBUILD_VECTOR(BV
, IntVT
).getNode();
10740 // Now we know the input is an integer vector. If the output is a FP type,
10741 // convert to integer first, then to FP of the right size.
10742 if (DstEltVT
.isFloatingPoint()) {
10743 EVT TmpVT
= EVT::getIntegerVT(*DAG
.getContext(), DstEltVT
.getSizeInBits());
10744 SDNode
*Tmp
= ConstantFoldBITCASTofBUILD_VECTOR(BV
, TmpVT
).getNode();
10746 // Next, convert to FP elements of the same size.
10747 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp
, DstEltVT
);
10752 // Okay, we know the src/dst types are both integers of differing types.
10753 // Handling growing first.
10754 assert(SrcEltVT
.isInteger() && DstEltVT
.isInteger());
10755 if (SrcBitSize
< DstBitSize
) {
10756 unsigned NumInputsPerOutput
= DstBitSize
/SrcBitSize
;
10758 SmallVector
<SDValue
, 8> Ops
;
10759 for (unsigned i
= 0, e
= BV
->getNumOperands(); i
!= e
;
10760 i
+= NumInputsPerOutput
) {
10761 bool isLE
= DAG
.getDataLayout().isLittleEndian();
10762 APInt NewBits
= APInt(DstBitSize
, 0);
10763 bool EltIsUndef
= true;
10764 for (unsigned j
= 0; j
!= NumInputsPerOutput
; ++j
) {
10765 // Shift the previously computed bits over.
10766 NewBits
<<= SrcBitSize
;
10767 SDValue Op
= BV
->getOperand(i
+ (isLE
? (NumInputsPerOutput
-j
-1) : j
));
10768 if (Op
.isUndef()) continue;
10769 EltIsUndef
= false;
10771 NewBits
|= cast
<ConstantSDNode
>(Op
)->getAPIntValue().
10772 zextOrTrunc(SrcBitSize
).zext(DstBitSize
);
10776 Ops
.push_back(DAG
.getUNDEF(DstEltVT
));
10778 Ops
.push_back(DAG
.getConstant(NewBits
, DL
, DstEltVT
));
10781 EVT VT
= EVT::getVectorVT(*DAG
.getContext(), DstEltVT
, Ops
.size());
10782 return DAG
.getBuildVector(VT
, DL
, Ops
);
10785 // Finally, this must be the case where we are shrinking elements: each input
10786 // turns into multiple outputs.
10787 unsigned NumOutputsPerInput
= SrcBitSize
/DstBitSize
;
10788 EVT VT
= EVT::getVectorVT(*DAG
.getContext(), DstEltVT
,
10789 NumOutputsPerInput
*BV
->getNumOperands());
10790 SmallVector
<SDValue
, 8> Ops
;
10792 for (const SDValue
&Op
: BV
->op_values()) {
10793 if (Op
.isUndef()) {
10794 Ops
.append(NumOutputsPerInput
, DAG
.getUNDEF(DstEltVT
));
10798 APInt OpVal
= cast
<ConstantSDNode
>(Op
)->
10799 getAPIntValue().zextOrTrunc(SrcBitSize
);
10801 for (unsigned j
= 0; j
!= NumOutputsPerInput
; ++j
) {
10802 APInt ThisVal
= OpVal
.trunc(DstBitSize
);
10803 Ops
.push_back(DAG
.getConstant(ThisVal
, DL
, DstEltVT
));
10804 OpVal
.lshrInPlace(DstBitSize
);
10807 // For big endian targets, swap the order of the pieces of each element.
10808 if (DAG
.getDataLayout().isBigEndian())
10809 std::reverse(Ops
.end()-NumOutputsPerInput
, Ops
.end());
10812 return DAG
.getBuildVector(VT
, DL
, Ops
);
10815 static bool isContractable(SDNode
*N
) {
10816 SDNodeFlags F
= N
->getFlags();
10817 return F
.hasAllowContract() || F
.hasAllowReassociation();
10820 /// Try to perform FMA combining on a given FADD node.
10821 SDValue
DAGCombiner::visitFADDForFMACombine(SDNode
*N
) {
10822 SDValue N0
= N
->getOperand(0);
10823 SDValue N1
= N
->getOperand(1);
10824 EVT VT
= N
->getValueType(0);
10827 const TargetOptions
&Options
= DAG
.getTarget().Options
;
10829 // Floating-point multiply-add with intermediate rounding.
10830 bool HasFMAD
= (LegalOperations
&& TLI
.isOperationLegal(ISD::FMAD
, VT
));
10832 // Floating-point multiply-add without intermediate rounding.
10834 TLI
.isFMAFasterThanFMulAndFAdd(VT
) &&
10835 (!LegalOperations
|| TLI
.isOperationLegalOrCustom(ISD::FMA
, VT
));
10837 // No valid opcode, do not combine.
10838 if (!HasFMAD
&& !HasFMA
)
10841 SDNodeFlags Flags
= N
->getFlags();
10842 bool CanFuse
= Options
.UnsafeFPMath
|| isContractable(N
);
10843 bool AllowFusionGlobally
= (Options
.AllowFPOpFusion
== FPOpFusion::Fast
||
10844 CanFuse
|| HasFMAD
);
10845 // If the addition is not contractable, do not combine.
10846 if (!AllowFusionGlobally
&& !isContractable(N
))
10849 const SelectionDAGTargetInfo
*STI
= DAG
.getSubtarget().getSelectionDAGInfo();
10850 if (STI
&& STI
->generateFMAsInMachineCombiner(OptLevel
))
10853 // Always prefer FMAD to FMA for precision.
10854 unsigned PreferredFusedOpcode
= HasFMAD
? ISD::FMAD
: ISD::FMA
;
10855 bool Aggressive
= TLI
.enableAggressiveFMAFusion(VT
);
10857 // Is the node an FMUL and contractable either due to global flags or
10859 auto isContractableFMUL
= [AllowFusionGlobally
](SDValue N
) {
10860 if (N
.getOpcode() != ISD::FMUL
)
10862 return AllowFusionGlobally
|| isContractable(N
.getNode());
10864 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10865 // prefer to fold the multiply with fewer uses.
10866 if (Aggressive
&& isContractableFMUL(N0
) && isContractableFMUL(N1
)) {
10867 if (N0
.getNode()->use_size() > N1
.getNode()->use_size())
10871 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10872 if (isContractableFMUL(N0
) && (Aggressive
|| N0
->hasOneUse())) {
10873 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10874 N0
.getOperand(0), N0
.getOperand(1), N1
, Flags
);
10877 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10878 // Note: Commutes FADD operands.
10879 if (isContractableFMUL(N1
) && (Aggressive
|| N1
->hasOneUse())) {
10880 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10881 N1
.getOperand(0), N1
.getOperand(1), N0
, Flags
);
10884 // Look through FP_EXTEND nodes to do more combining.
10886 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10887 if (N0
.getOpcode() == ISD::FP_EXTEND
) {
10888 SDValue N00
= N0
.getOperand(0);
10889 if (isContractableFMUL(N00
) &&
10890 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N00
.getValueType())) {
10891 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10892 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
10893 N00
.getOperand(0)),
10894 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
10895 N00
.getOperand(1)), N1
, Flags
);
10899 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10900 // Note: Commutes FADD operands.
10901 if (N1
.getOpcode() == ISD::FP_EXTEND
) {
10902 SDValue N10
= N1
.getOperand(0);
10903 if (isContractableFMUL(N10
) &&
10904 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N10
.getValueType())) {
10905 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10906 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
10907 N10
.getOperand(0)),
10908 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
10909 N10
.getOperand(1)), N0
, Flags
);
10913 // More folding opportunities when target permits.
10915 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10917 N0
.getOpcode() == PreferredFusedOpcode
&&
10918 N0
.getOperand(2).getOpcode() == ISD::FMUL
&&
10919 N0
->hasOneUse() && N0
.getOperand(2)->hasOneUse()) {
10920 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10921 N0
.getOperand(0), N0
.getOperand(1),
10922 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10923 N0
.getOperand(2).getOperand(0),
10924 N0
.getOperand(2).getOperand(1),
10925 N1
, Flags
), Flags
);
10928 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10930 N1
->getOpcode() == PreferredFusedOpcode
&&
10931 N1
.getOperand(2).getOpcode() == ISD::FMUL
&&
10932 N1
->hasOneUse() && N1
.getOperand(2)->hasOneUse()) {
10933 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10934 N1
.getOperand(0), N1
.getOperand(1),
10935 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10936 N1
.getOperand(2).getOperand(0),
10937 N1
.getOperand(2).getOperand(1),
10938 N0
, Flags
), Flags
);
10942 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10943 // -> (fma x, y, (fma (fpext u), (fpext v), z))
10944 auto FoldFAddFMAFPExtFMul
= [&] (
10945 SDValue X
, SDValue Y
, SDValue U
, SDValue V
, SDValue Z
,
10946 SDNodeFlags Flags
) {
10947 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
, X
, Y
,
10948 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10949 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, U
),
10950 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, V
),
10953 if (N0
.getOpcode() == PreferredFusedOpcode
) {
10954 SDValue N02
= N0
.getOperand(2);
10955 if (N02
.getOpcode() == ISD::FP_EXTEND
) {
10956 SDValue N020
= N02
.getOperand(0);
10957 if (isContractableFMUL(N020
) &&
10958 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N020
.getValueType())) {
10959 return FoldFAddFMAFPExtFMul(N0
.getOperand(0), N0
.getOperand(1),
10960 N020
.getOperand(0), N020
.getOperand(1),
10966 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10967 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10968 // FIXME: This turns two single-precision and one double-precision
10969 // operation into two double-precision operations, which might not be
10970 // interesting for all targets, especially GPUs.
10971 auto FoldFAddFPExtFMAFMul
= [&] (
10972 SDValue X
, SDValue Y
, SDValue U
, SDValue V
, SDValue Z
,
10973 SDNodeFlags Flags
) {
10974 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10975 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, X
),
10976 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, Y
),
10977 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
10978 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, U
),
10979 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, V
),
10982 if (N0
.getOpcode() == ISD::FP_EXTEND
) {
10983 SDValue N00
= N0
.getOperand(0);
10984 if (N00
.getOpcode() == PreferredFusedOpcode
) {
10985 SDValue N002
= N00
.getOperand(2);
10986 if (isContractableFMUL(N002
) &&
10987 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N00
.getValueType())) {
10988 return FoldFAddFPExtFMAFMul(N00
.getOperand(0), N00
.getOperand(1),
10989 N002
.getOperand(0), N002
.getOperand(1),
10995 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10996 // -> (fma y, z, (fma (fpext u), (fpext v), x))
10997 if (N1
.getOpcode() == PreferredFusedOpcode
) {
10998 SDValue N12
= N1
.getOperand(2);
10999 if (N12
.getOpcode() == ISD::FP_EXTEND
) {
11000 SDValue N120
= N12
.getOperand(0);
11001 if (isContractableFMUL(N120
) &&
11002 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N120
.getValueType())) {
11003 return FoldFAddFMAFPExtFMul(N1
.getOperand(0), N1
.getOperand(1),
11004 N120
.getOperand(0), N120
.getOperand(1),
11010 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11011 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11012 // FIXME: This turns two single-precision and one double-precision
11013 // operation into two double-precision operations, which might not be
11014 // interesting for all targets, especially GPUs.
11015 if (N1
.getOpcode() == ISD::FP_EXTEND
) {
11016 SDValue N10
= N1
.getOperand(0);
11017 if (N10
.getOpcode() == PreferredFusedOpcode
) {
11018 SDValue N102
= N10
.getOperand(2);
11019 if (isContractableFMUL(N102
) &&
11020 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N10
.getValueType())) {
11021 return FoldFAddFPExtFMAFMul(N10
.getOperand(0), N10
.getOperand(1),
11022 N102
.getOperand(0), N102
.getOperand(1),
11032 /// Try to perform FMA combining on a given FSUB node.
11033 SDValue
DAGCombiner::visitFSUBForFMACombine(SDNode
*N
) {
11034 SDValue N0
= N
->getOperand(0);
11035 SDValue N1
= N
->getOperand(1);
11036 EVT VT
= N
->getValueType(0);
11039 const TargetOptions
&Options
= DAG
.getTarget().Options
;
11040 // Floating-point multiply-add with intermediate rounding.
11041 bool HasFMAD
= (LegalOperations
&& TLI
.isOperationLegal(ISD::FMAD
, VT
));
11043 // Floating-point multiply-add without intermediate rounding.
11045 TLI
.isFMAFasterThanFMulAndFAdd(VT
) &&
11046 (!LegalOperations
|| TLI
.isOperationLegalOrCustom(ISD::FMA
, VT
));
11048 // No valid opcode, do not combine.
11049 if (!HasFMAD
&& !HasFMA
)
11052 const SDNodeFlags Flags
= N
->getFlags();
11053 bool CanFuse
= Options
.UnsafeFPMath
|| isContractable(N
);
11054 bool AllowFusionGlobally
= (Options
.AllowFPOpFusion
== FPOpFusion::Fast
||
11055 CanFuse
|| HasFMAD
);
11057 // If the subtraction is not contractable, do not combine.
11058 if (!AllowFusionGlobally
&& !isContractable(N
))
11061 const SelectionDAGTargetInfo
*STI
= DAG
.getSubtarget().getSelectionDAGInfo();
11062 if (STI
&& STI
->generateFMAsInMachineCombiner(OptLevel
))
11065 // Always prefer FMAD to FMA for precision.
11066 unsigned PreferredFusedOpcode
= HasFMAD
? ISD::FMAD
: ISD::FMA
;
11067 bool Aggressive
= TLI
.enableAggressiveFMAFusion(VT
);
11069 // Is the node an FMUL and contractable either due to global flags or
11071 auto isContractableFMUL
= [AllowFusionGlobally
](SDValue N
) {
11072 if (N
.getOpcode() != ISD::FMUL
)
11074 return AllowFusionGlobally
|| isContractable(N
.getNode());
11077 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11078 if (isContractableFMUL(N0
) && (Aggressive
|| N0
->hasOneUse())) {
11079 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11080 N0
.getOperand(0), N0
.getOperand(1),
11081 DAG
.getNode(ISD::FNEG
, SL
, VT
, N1
), Flags
);
11084 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11085 // Note: Commutes FSUB operands.
11086 if (isContractableFMUL(N1
) && (Aggressive
|| N1
->hasOneUse())) {
11087 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11088 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11090 N1
.getOperand(1), N0
, Flags
);
11093 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11094 if (N0
.getOpcode() == ISD::FNEG
&& isContractableFMUL(N0
.getOperand(0)) &&
11095 (Aggressive
|| (N0
->hasOneUse() && N0
.getOperand(0).hasOneUse()))) {
11096 SDValue N00
= N0
.getOperand(0).getOperand(0);
11097 SDValue N01
= N0
.getOperand(0).getOperand(1);
11098 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11099 DAG
.getNode(ISD::FNEG
, SL
, VT
, N00
), N01
,
11100 DAG
.getNode(ISD::FNEG
, SL
, VT
, N1
), Flags
);
11103 // Look through FP_EXTEND nodes to do more combining.
11105 // fold (fsub (fpext (fmul x, y)), z)
11106 // -> (fma (fpext x), (fpext y), (fneg z))
11107 if (N0
.getOpcode() == ISD::FP_EXTEND
) {
11108 SDValue N00
= N0
.getOperand(0);
11109 if (isContractableFMUL(N00
) &&
11110 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N00
.getValueType())) {
11111 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11112 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11113 N00
.getOperand(0)),
11114 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11115 N00
.getOperand(1)),
11116 DAG
.getNode(ISD::FNEG
, SL
, VT
, N1
), Flags
);
11120 // fold (fsub x, (fpext (fmul y, z)))
11121 // -> (fma (fneg (fpext y)), (fpext z), x)
11122 // Note: Commutes FSUB operands.
11123 if (N1
.getOpcode() == ISD::FP_EXTEND
) {
11124 SDValue N10
= N1
.getOperand(0);
11125 if (isContractableFMUL(N10
) &&
11126 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N10
.getValueType())) {
11127 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11128 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11129 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11130 N10
.getOperand(0))),
11131 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11132 N10
.getOperand(1)),
11137 // fold (fsub (fpext (fneg (fmul, x, y))), z)
11138 // -> (fneg (fma (fpext x), (fpext y), z))
11139 // Note: This could be removed with appropriate canonicalization of the
11140 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11141 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11142 // from implementing the canonicalization in visitFSUB.
11143 if (N0
.getOpcode() == ISD::FP_EXTEND
) {
11144 SDValue N00
= N0
.getOperand(0);
11145 if (N00
.getOpcode() == ISD::FNEG
) {
11146 SDValue N000
= N00
.getOperand(0);
11147 if (isContractableFMUL(N000
) &&
11148 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N00
.getValueType())) {
11149 return DAG
.getNode(ISD::FNEG
, SL
, VT
,
11150 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11151 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11152 N000
.getOperand(0)),
11153 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11154 N000
.getOperand(1)),
11160 // fold (fsub (fneg (fpext (fmul, x, y))), z)
11161 // -> (fneg (fma (fpext x)), (fpext y), z)
11162 // Note: This could be removed with appropriate canonicalization of the
11163 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11164 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11165 // from implementing the canonicalization in visitFSUB.
11166 if (N0
.getOpcode() == ISD::FNEG
) {
11167 SDValue N00
= N0
.getOperand(0);
11168 if (N00
.getOpcode() == ISD::FP_EXTEND
) {
11169 SDValue N000
= N00
.getOperand(0);
11170 if (isContractableFMUL(N000
) &&
11171 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N000
.getValueType())) {
11172 return DAG
.getNode(ISD::FNEG
, SL
, VT
,
11173 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11174 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11175 N000
.getOperand(0)),
11176 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11177 N000
.getOperand(1)),
11183 // More folding opportunities when target permits.
11185 // fold (fsub (fma x, y, (fmul u, v)), z)
11186 // -> (fma x, y (fma u, v, (fneg z)))
11187 if (CanFuse
&& N0
.getOpcode() == PreferredFusedOpcode
&&
11188 isContractableFMUL(N0
.getOperand(2)) && N0
->hasOneUse() &&
11189 N0
.getOperand(2)->hasOneUse()) {
11190 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11191 N0
.getOperand(0), N0
.getOperand(1),
11192 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11193 N0
.getOperand(2).getOperand(0),
11194 N0
.getOperand(2).getOperand(1),
11195 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11196 N1
), Flags
), Flags
);
11199 // fold (fsub x, (fma y, z, (fmul u, v)))
11200 // -> (fma (fneg y), z, (fma (fneg u), v, x))
11201 if (CanFuse
&& N1
.getOpcode() == PreferredFusedOpcode
&&
11202 isContractableFMUL(N1
.getOperand(2))) {
11203 SDValue N20
= N1
.getOperand(2).getOperand(0);
11204 SDValue N21
= N1
.getOperand(2).getOperand(1);
11205 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11206 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11209 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11210 DAG
.getNode(ISD::FNEG
, SL
, VT
, N20
),
11211 N21
, N0
, Flags
), Flags
);
11215 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11216 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11217 if (N0
.getOpcode() == PreferredFusedOpcode
) {
11218 SDValue N02
= N0
.getOperand(2);
11219 if (N02
.getOpcode() == ISD::FP_EXTEND
) {
11220 SDValue N020
= N02
.getOperand(0);
11221 if (isContractableFMUL(N020
) &&
11222 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N020
.getValueType())) {
11223 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11224 N0
.getOperand(0), N0
.getOperand(1),
11225 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11226 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11227 N020
.getOperand(0)),
11228 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11229 N020
.getOperand(1)),
11230 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11231 N1
), Flags
), Flags
);
11236 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11237 // -> (fma (fpext x), (fpext y),
11238 // (fma (fpext u), (fpext v), (fneg z)))
11239 // FIXME: This turns two single-precision and one double-precision
11240 // operation into two double-precision operations, which might not be
11241 // interesting for all targets, especially GPUs.
11242 if (N0
.getOpcode() == ISD::FP_EXTEND
) {
11243 SDValue N00
= N0
.getOperand(0);
11244 if (N00
.getOpcode() == PreferredFusedOpcode
) {
11245 SDValue N002
= N00
.getOperand(2);
11246 if (isContractableFMUL(N002
) &&
11247 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N00
.getValueType())) {
11248 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11249 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11250 N00
.getOperand(0)),
11251 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11252 N00
.getOperand(1)),
11253 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11254 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11255 N002
.getOperand(0)),
11256 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11257 N002
.getOperand(1)),
11258 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11259 N1
), Flags
), Flags
);
11264 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11265 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11266 if (N1
.getOpcode() == PreferredFusedOpcode
&&
11267 N1
.getOperand(2).getOpcode() == ISD::FP_EXTEND
) {
11268 SDValue N120
= N1
.getOperand(2).getOperand(0);
11269 if (isContractableFMUL(N120
) &&
11270 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, N120
.getValueType())) {
11271 SDValue N1200
= N120
.getOperand(0);
11272 SDValue N1201
= N120
.getOperand(1);
11273 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11274 DAG
.getNode(ISD::FNEG
, SL
, VT
, N1
.getOperand(0)),
11276 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11277 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11278 DAG
.getNode(ISD::FP_EXTEND
, SL
,
11280 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11282 N0
, Flags
), Flags
);
11286 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11287 // -> (fma (fneg (fpext y)), (fpext z),
11288 // (fma (fneg (fpext u)), (fpext v), x))
11289 // FIXME: This turns two single-precision and one double-precision
11290 // operation into two double-precision operations, which might not be
11291 // interesting for all targets, especially GPUs.
11292 if (N1
.getOpcode() == ISD::FP_EXTEND
&&
11293 N1
.getOperand(0).getOpcode() == PreferredFusedOpcode
) {
11294 SDValue CvtSrc
= N1
.getOperand(0);
11295 SDValue N100
= CvtSrc
.getOperand(0);
11296 SDValue N101
= CvtSrc
.getOperand(1);
11297 SDValue N102
= CvtSrc
.getOperand(2);
11298 if (isContractableFMUL(N102
) &&
11299 TLI
.isFPExtFoldable(PreferredFusedOpcode
, VT
, CvtSrc
.getValueType())) {
11300 SDValue N1020
= N102
.getOperand(0);
11301 SDValue N1021
= N102
.getOperand(1);
11302 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11303 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11304 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11306 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
, N101
),
11307 DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11308 DAG
.getNode(ISD::FNEG
, SL
, VT
,
11309 DAG
.getNode(ISD::FP_EXTEND
, SL
,
11311 DAG
.getNode(ISD::FP_EXTEND
, SL
, VT
,
11313 N0
, Flags
), Flags
);
11321 /// Try to perform FMA combining on a given FMUL node based on the distributive
11322 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11323 /// subtraction instead of addition).
11324 SDValue
DAGCombiner::visitFMULForFMADistributiveCombine(SDNode
*N
) {
11325 SDValue N0
= N
->getOperand(0);
11326 SDValue N1
= N
->getOperand(1);
11327 EVT VT
= N
->getValueType(0);
11329 const SDNodeFlags Flags
= N
->getFlags();
11331 assert(N
->getOpcode() == ISD::FMUL
&& "Expected FMUL Operation");
11333 const TargetOptions
&Options
= DAG
.getTarget().Options
;
11335 // The transforms below are incorrect when x == 0 and y == inf, because the
11336 // intermediate multiplication produces a nan.
11337 if (!Options
.NoInfsFPMath
)
11340 // Floating-point multiply-add without intermediate rounding.
11342 (Options
.AllowFPOpFusion
== FPOpFusion::Fast
|| Options
.UnsafeFPMath
) &&
11343 TLI
.isFMAFasterThanFMulAndFAdd(VT
) &&
11344 (!LegalOperations
|| TLI
.isOperationLegalOrCustom(ISD::FMA
, VT
));
11346 // Floating-point multiply-add with intermediate rounding. This can result
11347 // in a less precise result due to the changed rounding order.
11348 bool HasFMAD
= Options
.UnsafeFPMath
&&
11349 (LegalOperations
&& TLI
.isOperationLegal(ISD::FMAD
, VT
));
11351 // No valid opcode, do not combine.
11352 if (!HasFMAD
&& !HasFMA
)
11355 // Always prefer FMAD to FMA for precision.
11356 unsigned PreferredFusedOpcode
= HasFMAD
? ISD::FMAD
: ISD::FMA
;
11357 bool Aggressive
= TLI
.enableAggressiveFMAFusion(VT
);
11359 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11360 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11361 auto FuseFADD
= [&](SDValue X
, SDValue Y
, const SDNodeFlags Flags
) {
11362 if (X
.getOpcode() == ISD::FADD
&& (Aggressive
|| X
->hasOneUse())) {
11363 if (auto *C
= isConstOrConstSplatFP(X
.getOperand(1), true)) {
11364 if (C
->isExactlyValue(+1.0))
11365 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
, X
.getOperand(0), Y
,
11367 if (C
->isExactlyValue(-1.0))
11368 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
, X
.getOperand(0), Y
,
11369 DAG
.getNode(ISD::FNEG
, SL
, VT
, Y
), Flags
);
11375 if (SDValue FMA
= FuseFADD(N0
, N1
, Flags
))
11377 if (SDValue FMA
= FuseFADD(N1
, N0
, Flags
))
11380 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11381 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11382 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11383 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11384 auto FuseFSUB
= [&](SDValue X
, SDValue Y
, const SDNodeFlags Flags
) {
11385 if (X
.getOpcode() == ISD::FSUB
&& (Aggressive
|| X
->hasOneUse())) {
11386 if (auto *C0
= isConstOrConstSplatFP(X
.getOperand(0), true)) {
11387 if (C0
->isExactlyValue(+1.0))
11388 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11389 DAG
.getNode(ISD::FNEG
, SL
, VT
, X
.getOperand(1)), Y
,
11391 if (C0
->isExactlyValue(-1.0))
11392 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
,
11393 DAG
.getNode(ISD::FNEG
, SL
, VT
, X
.getOperand(1)), Y
,
11394 DAG
.getNode(ISD::FNEG
, SL
, VT
, Y
), Flags
);
11396 if (auto *C1
= isConstOrConstSplatFP(X
.getOperand(1), true)) {
11397 if (C1
->isExactlyValue(+1.0))
11398 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
, X
.getOperand(0), Y
,
11399 DAG
.getNode(ISD::FNEG
, SL
, VT
, Y
), Flags
);
11400 if (C1
->isExactlyValue(-1.0))
11401 return DAG
.getNode(PreferredFusedOpcode
, SL
, VT
, X
.getOperand(0), Y
,
11408 if (SDValue FMA
= FuseFSUB(N0
, N1
, Flags
))
11410 if (SDValue FMA
= FuseFSUB(N1
, N0
, Flags
))
11416 SDValue
DAGCombiner::visitFADD(SDNode
*N
) {
11417 SDValue N0
= N
->getOperand(0);
11418 SDValue N1
= N
->getOperand(1);
11419 bool N0CFP
= isConstantFPBuildVectorOrConstantFP(N0
);
11420 bool N1CFP
= isConstantFPBuildVectorOrConstantFP(N1
);
11421 EVT VT
= N
->getValueType(0);
11423 const TargetOptions
&Options
= DAG
.getTarget().Options
;
11424 const SDNodeFlags Flags
= N
->getFlags();
11428 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
11431 // fold (fadd c1, c2) -> c1 + c2
11432 if (N0CFP
&& N1CFP
)
11433 return DAG
.getNode(ISD::FADD
, DL
, VT
, N0
, N1
, Flags
);
11435 // canonicalize constant to RHS
11436 if (N0CFP
&& !N1CFP
)
11437 return DAG
.getNode(ISD::FADD
, DL
, VT
, N1
, N0
, Flags
);
11439 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11440 ConstantFPSDNode
*N1C
= isConstOrConstSplatFP(N1
, true);
11441 if (N1C
&& N1C
->isZero())
11442 if (N1C
->isNegative() || Options
.UnsafeFPMath
|| Flags
.hasNoSignedZeros())
11445 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
11448 // fold (fadd A, (fneg B)) -> (fsub A, B)
11449 if ((!LegalOperations
|| TLI
.isOperationLegalOrCustom(ISD::FSUB
, VT
)) &&
11450 isNegatibleForFree(N1
, LegalOperations
, TLI
, &Options
, ForCodeSize
) == 2)
11451 return DAG
.getNode(ISD::FSUB
, DL
, VT
, N0
,
11452 GetNegatedExpression(N1
, DAG
, LegalOperations
,
11453 ForCodeSize
), Flags
);
11455 // fold (fadd (fneg A), B) -> (fsub B, A)
11456 if ((!LegalOperations
|| TLI
.isOperationLegalOrCustom(ISD::FSUB
, VT
)) &&
11457 isNegatibleForFree(N0
, LegalOperations
, TLI
, &Options
, ForCodeSize
) == 2)
11458 return DAG
.getNode(ISD::FSUB
, DL
, VT
, N1
,
11459 GetNegatedExpression(N0
, DAG
, LegalOperations
,
11460 ForCodeSize
), Flags
);
11462 auto isFMulNegTwo
= [](SDValue FMul
) {
11463 if (!FMul
.hasOneUse() || FMul
.getOpcode() != ISD::FMUL
)
11465 auto *C
= isConstOrConstSplatFP(FMul
.getOperand(1), true);
11466 return C
&& C
->isExactlyValue(-2.0);
11469 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11470 if (isFMulNegTwo(N0
)) {
11471 SDValue B
= N0
.getOperand(0);
11472 SDValue Add
= DAG
.getNode(ISD::FADD
, DL
, VT
, B
, B
, Flags
);
11473 return DAG
.getNode(ISD::FSUB
, DL
, VT
, N1
, Add
, Flags
);
11475 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11476 if (isFMulNegTwo(N1
)) {
11477 SDValue B
= N1
.getOperand(0);
11478 SDValue Add
= DAG
.getNode(ISD::FADD
, DL
, VT
, B
, B
, Flags
);
11479 return DAG
.getNode(ISD::FSUB
, DL
, VT
, N0
, Add
, Flags
);
11482 // No FP constant should be created after legalization as Instruction
11483 // Selection pass has a hard time dealing with FP constants.
11484 bool AllowNewConst
= (Level
< AfterLegalizeDAG
);
11486 // If 'unsafe math' or nnan is enabled, fold lots of things.
11487 if ((Options
.UnsafeFPMath
|| Flags
.hasNoNaNs()) && AllowNewConst
) {
11488 // If allowed, fold (fadd (fneg x), x) -> 0.0
11489 if (N0
.getOpcode() == ISD::FNEG
&& N0
.getOperand(0) == N1
)
11490 return DAG
.getConstantFP(0.0, DL
, VT
);
11492 // If allowed, fold (fadd x, (fneg x)) -> 0.0
11493 if (N1
.getOpcode() == ISD::FNEG
&& N1
.getOperand(0) == N0
)
11494 return DAG
.getConstantFP(0.0, DL
, VT
);
11497 // If 'unsafe math' or reassoc and nsz, fold lots of things.
11498 // TODO: break out portions of the transformations below for which Unsafe is
11499 // considered and which do not require both nsz and reassoc
11500 if ((Options
.UnsafeFPMath
||
11501 (Flags
.hasAllowReassociation() && Flags
.hasNoSignedZeros())) &&
11503 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11504 if (N1CFP
&& N0
.getOpcode() == ISD::FADD
&&
11505 isConstantFPBuildVectorOrConstantFP(N0
.getOperand(1))) {
11506 SDValue NewC
= DAG
.getNode(ISD::FADD
, DL
, VT
, N0
.getOperand(1), N1
, Flags
);
11507 return DAG
.getNode(ISD::FADD
, DL
, VT
, N0
.getOperand(0), NewC
, Flags
);
11510 // We can fold chains of FADD's of the same value into multiplications.
11511 // This transform is not safe in general because we are reducing the number
11512 // of rounding steps.
11513 if (TLI
.isOperationLegalOrCustom(ISD::FMUL
, VT
) && !N0CFP
&& !N1CFP
) {
11514 if (N0
.getOpcode() == ISD::FMUL
) {
11515 bool CFP00
= isConstantFPBuildVectorOrConstantFP(N0
.getOperand(0));
11516 bool CFP01
= isConstantFPBuildVectorOrConstantFP(N0
.getOperand(1));
11518 // (fadd (fmul x, c), x) -> (fmul x, c+1)
11519 if (CFP01
&& !CFP00
&& N0
.getOperand(0) == N1
) {
11520 SDValue NewCFP
= DAG
.getNode(ISD::FADD
, DL
, VT
, N0
.getOperand(1),
11521 DAG
.getConstantFP(1.0, DL
, VT
), Flags
);
11522 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N1
, NewCFP
, Flags
);
11525 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11526 if (CFP01
&& !CFP00
&& N1
.getOpcode() == ISD::FADD
&&
11527 N1
.getOperand(0) == N1
.getOperand(1) &&
11528 N0
.getOperand(0) == N1
.getOperand(0)) {
11529 SDValue NewCFP
= DAG
.getNode(ISD::FADD
, DL
, VT
, N0
.getOperand(1),
11530 DAG
.getConstantFP(2.0, DL
, VT
), Flags
);
11531 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
.getOperand(0), NewCFP
, Flags
);
11535 if (N1
.getOpcode() == ISD::FMUL
) {
11536 bool CFP10
= isConstantFPBuildVectorOrConstantFP(N1
.getOperand(0));
11537 bool CFP11
= isConstantFPBuildVectorOrConstantFP(N1
.getOperand(1));
11539 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11540 if (CFP11
&& !CFP10
&& N1
.getOperand(0) == N0
) {
11541 SDValue NewCFP
= DAG
.getNode(ISD::FADD
, DL
, VT
, N1
.getOperand(1),
11542 DAG
.getConstantFP(1.0, DL
, VT
), Flags
);
11543 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, NewCFP
, Flags
);
11546 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11547 if (CFP11
&& !CFP10
&& N0
.getOpcode() == ISD::FADD
&&
11548 N0
.getOperand(0) == N0
.getOperand(1) &&
11549 N1
.getOperand(0) == N0
.getOperand(0)) {
11550 SDValue NewCFP
= DAG
.getNode(ISD::FADD
, DL
, VT
, N1
.getOperand(1),
11551 DAG
.getConstantFP(2.0, DL
, VT
), Flags
);
11552 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N1
.getOperand(0), NewCFP
, Flags
);
11556 if (N0
.getOpcode() == ISD::FADD
) {
11557 bool CFP00
= isConstantFPBuildVectorOrConstantFP(N0
.getOperand(0));
11558 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11559 if (!CFP00
&& N0
.getOperand(0) == N0
.getOperand(1) &&
11560 (N0
.getOperand(0) == N1
)) {
11561 return DAG
.getNode(ISD::FMUL
, DL
, VT
,
11562 N1
, DAG
.getConstantFP(3.0, DL
, VT
), Flags
);
11566 if (N1
.getOpcode() == ISD::FADD
) {
11567 bool CFP10
= isConstantFPBuildVectorOrConstantFP(N1
.getOperand(0));
11568 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11569 if (!CFP10
&& N1
.getOperand(0) == N1
.getOperand(1) &&
11570 N1
.getOperand(0) == N0
) {
11571 return DAG
.getNode(ISD::FMUL
, DL
, VT
,
11572 N0
, DAG
.getConstantFP(3.0, DL
, VT
), Flags
);
11576 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11577 if (N0
.getOpcode() == ISD::FADD
&& N1
.getOpcode() == ISD::FADD
&&
11578 N0
.getOperand(0) == N0
.getOperand(1) &&
11579 N1
.getOperand(0) == N1
.getOperand(1) &&
11580 N0
.getOperand(0) == N1
.getOperand(0)) {
11581 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
.getOperand(0),
11582 DAG
.getConstantFP(4.0, DL
, VT
), Flags
);
11585 } // enable-unsafe-fp-math
11587 // FADD -> FMA combines:
11588 if (SDValue Fused
= visitFADDForFMACombine(N
)) {
11589 AddToWorklist(Fused
.getNode());
11595 SDValue
DAGCombiner::visitFSUB(SDNode
*N
) {
11596 SDValue N0
= N
->getOperand(0);
11597 SDValue N1
= N
->getOperand(1);
11598 ConstantFPSDNode
*N0CFP
= isConstOrConstSplatFP(N0
, true);
11599 ConstantFPSDNode
*N1CFP
= isConstOrConstSplatFP(N1
, true);
11600 EVT VT
= N
->getValueType(0);
11602 const TargetOptions
&Options
= DAG
.getTarget().Options
;
11603 const SDNodeFlags Flags
= N
->getFlags();
11607 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
11610 // fold (fsub c1, c2) -> c1-c2
11611 if (N0CFP
&& N1CFP
)
11612 return DAG
.getNode(ISD::FSUB
, DL
, VT
, N0
, N1
, Flags
);
11614 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
11617 // (fsub A, 0) -> A
11618 if (N1CFP
&& N1CFP
->isZero()) {
11619 if (!N1CFP
->isNegative() || Options
.UnsafeFPMath
||
11620 Flags
.hasNoSignedZeros()) {
11626 // (fsub x, x) -> 0.0
11627 if (Options
.UnsafeFPMath
|| Flags
.hasNoNaNs())
11628 return DAG
.getConstantFP(0.0f
, DL
, VT
);
11631 // (fsub -0.0, N1) -> -N1
11632 // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
11633 // FSUB does not specify the sign bit of a NaN. Also note that for
11634 // the same reason, the inverse transform is not safe, unless fast math
11635 // flags are in play.
11636 if (N0CFP
&& N0CFP
->isZero()) {
11637 if (N0CFP
->isNegative() ||
11638 (Options
.NoSignedZerosFPMath
|| Flags
.hasNoSignedZeros())) {
11639 if (isNegatibleForFree(N1
, LegalOperations
, TLI
, &Options
, ForCodeSize
))
11640 return GetNegatedExpression(N1
, DAG
, LegalOperations
, ForCodeSize
);
11641 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::FNEG
, VT
))
11642 return DAG
.getNode(ISD::FNEG
, DL
, VT
, N1
, Flags
);
11646 if ((Options
.UnsafeFPMath
||
11647 (Flags
.hasAllowReassociation() && Flags
.hasNoSignedZeros()))
11648 && N1
.getOpcode() == ISD::FADD
) {
11649 // X - (X + Y) -> -Y
11650 if (N0
== N1
->getOperand(0))
11651 return DAG
.getNode(ISD::FNEG
, DL
, VT
, N1
->getOperand(1), Flags
);
11652 // X - (Y + X) -> -Y
11653 if (N0
== N1
->getOperand(1))
11654 return DAG
.getNode(ISD::FNEG
, DL
, VT
, N1
->getOperand(0), Flags
);
11657 // fold (fsub A, (fneg B)) -> (fadd A, B)
11658 if (isNegatibleForFree(N1
, LegalOperations
, TLI
, &Options
, ForCodeSize
))
11659 return DAG
.getNode(ISD::FADD
, DL
, VT
, N0
,
11660 GetNegatedExpression(N1
, DAG
, LegalOperations
,
11661 ForCodeSize
), Flags
);
11663 // FSUB -> FMA combines:
11664 if (SDValue Fused
= visitFSUBForFMACombine(N
)) {
11665 AddToWorklist(Fused
.getNode());
11672 SDValue
DAGCombiner::visitFMUL(SDNode
*N
) {
11673 SDValue N0
= N
->getOperand(0);
11674 SDValue N1
= N
->getOperand(1);
11675 ConstantFPSDNode
*N0CFP
= isConstOrConstSplatFP(N0
, true);
11676 ConstantFPSDNode
*N1CFP
= isConstOrConstSplatFP(N1
, true);
11677 EVT VT
= N
->getValueType(0);
11679 const TargetOptions
&Options
= DAG
.getTarget().Options
;
11680 const SDNodeFlags Flags
= N
->getFlags();
11683 if (VT
.isVector()) {
11684 // This just handles C1 * C2 for vectors. Other vector folds are below.
11685 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
11689 // fold (fmul c1, c2) -> c1*c2
11690 if (N0CFP
&& N1CFP
)
11691 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, N1
, Flags
);
11693 // canonicalize constant to RHS
11694 if (isConstantFPBuildVectorOrConstantFP(N0
) &&
11695 !isConstantFPBuildVectorOrConstantFP(N1
))
11696 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N1
, N0
, Flags
);
11698 // fold (fmul A, 1.0) -> A
11699 if (N1CFP
&& N1CFP
->isExactlyValue(1.0))
11702 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
11705 if (Options
.UnsafeFPMath
||
11706 (Flags
.hasNoNaNs() && Flags
.hasNoSignedZeros())) {
11707 // fold (fmul A, 0) -> 0
11708 if (N1CFP
&& N1CFP
->isZero())
11712 if (Options
.UnsafeFPMath
|| Flags
.hasAllowReassociation()) {
11713 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11714 if (isConstantFPBuildVectorOrConstantFP(N1
) &&
11715 N0
.getOpcode() == ISD::FMUL
) {
11716 SDValue N00
= N0
.getOperand(0);
11717 SDValue N01
= N0
.getOperand(1);
11718 // Avoid an infinite loop by making sure that N00 is not a constant
11719 // (the inner multiply has not been constant folded yet).
11720 if (isConstantFPBuildVectorOrConstantFP(N01
) &&
11721 !isConstantFPBuildVectorOrConstantFP(N00
)) {
11722 SDValue MulConsts
= DAG
.getNode(ISD::FMUL
, DL
, VT
, N01
, N1
, Flags
);
11723 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N00
, MulConsts
, Flags
);
11727 // Match a special-case: we convert X * 2.0 into fadd.
11728 // fmul (fadd X, X), C -> fmul X, 2.0 * C
11729 if (N0
.getOpcode() == ISD::FADD
&& N0
.hasOneUse() &&
11730 N0
.getOperand(0) == N0
.getOperand(1)) {
11731 const SDValue Two
= DAG
.getConstantFP(2.0, DL
, VT
);
11732 SDValue MulConsts
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Two
, N1
, Flags
);
11733 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
.getOperand(0), MulConsts
, Flags
);
11737 // fold (fmul X, 2.0) -> (fadd X, X)
11738 if (N1CFP
&& N1CFP
->isExactlyValue(+2.0))
11739 return DAG
.getNode(ISD::FADD
, DL
, VT
, N0
, N0
, Flags
);
11741 // fold (fmul X, -1.0) -> (fneg X)
11742 if (N1CFP
&& N1CFP
->isExactlyValue(-1.0))
11743 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::FNEG
, VT
))
11744 return DAG
.getNode(ISD::FNEG
, DL
, VT
, N0
);
11746 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11747 if (char LHSNeg
= isNegatibleForFree(N0
, LegalOperations
, TLI
, &Options
,
11749 if (char RHSNeg
= isNegatibleForFree(N1
, LegalOperations
, TLI
, &Options
,
11751 // Both can be negated for free, check to see if at least one is cheaper
11753 if (LHSNeg
== 2 || RHSNeg
== 2)
11754 return DAG
.getNode(ISD::FMUL
, DL
, VT
,
11755 GetNegatedExpression(N0
, DAG
, LegalOperations
,
11757 GetNegatedExpression(N1
, DAG
, LegalOperations
,
11763 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11764 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11765 if (Flags
.hasNoNaNs() && Flags
.hasNoSignedZeros() &&
11766 (N0
.getOpcode() == ISD::SELECT
|| N1
.getOpcode() == ISD::SELECT
) &&
11767 TLI
.isOperationLegal(ISD::FABS
, VT
)) {
11768 SDValue Select
= N0
, X
= N1
;
11769 if (Select
.getOpcode() != ISD::SELECT
)
11770 std::swap(Select
, X
);
11772 SDValue Cond
= Select
.getOperand(0);
11773 auto TrueOpnd
= dyn_cast
<ConstantFPSDNode
>(Select
.getOperand(1));
11774 auto FalseOpnd
= dyn_cast
<ConstantFPSDNode
>(Select
.getOperand(2));
11776 if (TrueOpnd
&& FalseOpnd
&&
11777 Cond
.getOpcode() == ISD::SETCC
&& Cond
.getOperand(0) == X
&&
11778 isa
<ConstantFPSDNode
>(Cond
.getOperand(1)) &&
11779 cast
<ConstantFPSDNode
>(Cond
.getOperand(1))->isExactlyValue(0.0)) {
11780 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
11789 std::swap(TrueOpnd
, FalseOpnd
);
11797 if (TrueOpnd
->isExactlyValue(-1.0) && FalseOpnd
->isExactlyValue(1.0) &&
11798 TLI
.isOperationLegal(ISD::FNEG
, VT
))
11799 return DAG
.getNode(ISD::FNEG
, DL
, VT
,
11800 DAG
.getNode(ISD::FABS
, DL
, VT
, X
));
11801 if (TrueOpnd
->isExactlyValue(1.0) && FalseOpnd
->isExactlyValue(-1.0))
11802 return DAG
.getNode(ISD::FABS
, DL
, VT
, X
);
11809 // FMUL -> FMA combines:
11810 if (SDValue Fused
= visitFMULForFMADistributiveCombine(N
)) {
11811 AddToWorklist(Fused
.getNode());
11818 SDValue
DAGCombiner::visitFMA(SDNode
*N
) {
11819 SDValue N0
= N
->getOperand(0);
11820 SDValue N1
= N
->getOperand(1);
11821 SDValue N2
= N
->getOperand(2);
11822 ConstantFPSDNode
*N0CFP
= dyn_cast
<ConstantFPSDNode
>(N0
);
11823 ConstantFPSDNode
*N1CFP
= dyn_cast
<ConstantFPSDNode
>(N1
);
11824 EVT VT
= N
->getValueType(0);
11826 const TargetOptions
&Options
= DAG
.getTarget().Options
;
11828 // FMA nodes have flags that propagate to the created nodes.
11829 const SDNodeFlags Flags
= N
->getFlags();
11830 bool UnsafeFPMath
= Options
.UnsafeFPMath
|| isContractable(N
);
11832 // Constant fold FMA.
11833 if (isa
<ConstantFPSDNode
>(N0
) &&
11834 isa
<ConstantFPSDNode
>(N1
) &&
11835 isa
<ConstantFPSDNode
>(N2
)) {
11836 return DAG
.getNode(ISD::FMA
, DL
, VT
, N0
, N1
, N2
);
11839 if (UnsafeFPMath
) {
11840 if (N0CFP
&& N0CFP
->isZero())
11842 if (N1CFP
&& N1CFP
->isZero())
11845 // TODO: The FMA node should have flags that propagate to these nodes.
11846 if (N0CFP
&& N0CFP
->isExactlyValue(1.0))
11847 return DAG
.getNode(ISD::FADD
, SDLoc(N
), VT
, N1
, N2
);
11848 if (N1CFP
&& N1CFP
->isExactlyValue(1.0))
11849 return DAG
.getNode(ISD::FADD
, SDLoc(N
), VT
, N0
, N2
);
11851 // Canonicalize (fma c, x, y) -> (fma x, c, y)
11852 if (isConstantFPBuildVectorOrConstantFP(N0
) &&
11853 !isConstantFPBuildVectorOrConstantFP(N1
))
11854 return DAG
.getNode(ISD::FMA
, SDLoc(N
), VT
, N1
, N0
, N2
);
11856 if (UnsafeFPMath
) {
11857 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11858 if (N2
.getOpcode() == ISD::FMUL
&& N0
== N2
.getOperand(0) &&
11859 isConstantFPBuildVectorOrConstantFP(N1
) &&
11860 isConstantFPBuildVectorOrConstantFP(N2
.getOperand(1))) {
11861 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
,
11862 DAG
.getNode(ISD::FADD
, DL
, VT
, N1
, N2
.getOperand(1),
11866 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11867 if (N0
.getOpcode() == ISD::FMUL
&&
11868 isConstantFPBuildVectorOrConstantFP(N1
) &&
11869 isConstantFPBuildVectorOrConstantFP(N0
.getOperand(1))) {
11870 return DAG
.getNode(ISD::FMA
, DL
, VT
,
11872 DAG
.getNode(ISD::FMUL
, DL
, VT
, N1
, N0
.getOperand(1),
11878 // (fma x, 1, y) -> (fadd x, y)
11879 // (fma x, -1, y) -> (fadd (fneg x), y)
11881 if (N1CFP
->isExactlyValue(1.0))
11882 // TODO: The FMA node should have flags that propagate to this node.
11883 return DAG
.getNode(ISD::FADD
, DL
, VT
, N0
, N2
);
11885 if (N1CFP
->isExactlyValue(-1.0) &&
11886 (!LegalOperations
|| TLI
.isOperationLegal(ISD::FNEG
, VT
))) {
11887 SDValue RHSNeg
= DAG
.getNode(ISD::FNEG
, DL
, VT
, N0
);
11888 AddToWorklist(RHSNeg
.getNode());
11889 // TODO: The FMA node should have flags that propagate to this node.
11890 return DAG
.getNode(ISD::FADD
, DL
, VT
, N2
, RHSNeg
);
11893 // fma (fneg x), K, y -> fma x -K, y
11894 if (N0
.getOpcode() == ISD::FNEG
&&
11895 (TLI
.isOperationLegal(ISD::ConstantFP
, VT
) ||
11896 (N1
.hasOneUse() && !TLI
.isFPImmLegal(N1CFP
->getValueAPF(), VT
,
11898 return DAG
.getNode(ISD::FMA
, DL
, VT
, N0
.getOperand(0),
11899 DAG
.getNode(ISD::FNEG
, DL
, VT
, N1
, Flags
), N2
);
11903 if (UnsafeFPMath
) {
11904 // (fma x, c, x) -> (fmul x, (c+1))
11905 if (N1CFP
&& N0
== N2
) {
11906 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
,
11907 DAG
.getNode(ISD::FADD
, DL
, VT
, N1
,
11908 DAG
.getConstantFP(1.0, DL
, VT
), Flags
),
11912 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11913 if (N1CFP
&& N2
.getOpcode() == ISD::FNEG
&& N2
.getOperand(0) == N0
) {
11914 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
,
11915 DAG
.getNode(ISD::FADD
, DL
, VT
, N1
,
11916 DAG
.getConstantFP(-1.0, DL
, VT
), Flags
),
11924 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11926 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11927 // Notice that this is not always beneficial. One reason is different targets
11928 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11929 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11930 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11931 SDValue
DAGCombiner::combineRepeatedFPDivisors(SDNode
*N
) {
11932 // TODO: Limit this transform based on optsize/minsize - it always creates at
11933 // least 1 extra instruction. But the perf win may be substantial enough
11934 // that only minsize should restrict this.
11935 bool UnsafeMath
= DAG
.getTarget().Options
.UnsafeFPMath
;
11936 const SDNodeFlags Flags
= N
->getFlags();
11937 if (!UnsafeMath
&& !Flags
.hasAllowReciprocal())
11940 // Skip if current node is a reciprocal.
11941 SDValue N0
= N
->getOperand(0);
11942 ConstantFPSDNode
*N0CFP
= isConstOrConstSplatFP(N0
, /* AllowUndefs */ true);
11943 if (N0CFP
&& N0CFP
->isExactlyValue(1.0))
11946 // Exit early if the target does not want this transform or if there can't
11947 // possibly be enough uses of the divisor to make the transform worthwhile.
11948 SDValue N1
= N
->getOperand(1);
11949 unsigned MinUses
= TLI
.combineRepeatedFPDivisors();
11951 // For splat vectors, scale the number of uses by the splat factor. If we can
11952 // convert the division into a scalar op, that will likely be much faster.
11953 unsigned NumElts
= 1;
11954 EVT VT
= N
->getValueType(0);
11955 if (VT
.isVector() && DAG
.isSplatValue(N1
))
11956 NumElts
= VT
.getVectorNumElements();
11958 if (!MinUses
|| (N1
->use_size() * NumElts
) < MinUses
)
11961 // Find all FDIV users of the same divisor.
11962 // Use a set because duplicates may be present in the user list.
11963 SetVector
<SDNode
*> Users
;
11964 for (auto *U
: N1
->uses()) {
11965 if (U
->getOpcode() == ISD::FDIV
&& U
->getOperand(1) == N1
) {
11966 // This division is eligible for optimization only if global unsafe math
11967 // is enabled or if this division allows reciprocal formation.
11968 if (UnsafeMath
|| U
->getFlags().hasAllowReciprocal())
11973 // Now that we have the actual number of divisor uses, make sure it meets
11974 // the minimum threshold specified by the target.
11975 if ((Users
.size() * NumElts
) < MinUses
)
11979 SDValue FPOne
= DAG
.getConstantFP(1.0, DL
, VT
);
11980 SDValue Reciprocal
= DAG
.getNode(ISD::FDIV
, DL
, VT
, FPOne
, N1
, Flags
);
11982 // Dividend / Divisor -> Dividend * Reciprocal
11983 for (auto *U
: Users
) {
11984 SDValue Dividend
= U
->getOperand(0);
11985 if (Dividend
!= FPOne
) {
11986 SDValue NewNode
= DAG
.getNode(ISD::FMUL
, SDLoc(U
), VT
, Dividend
,
11987 Reciprocal
, Flags
);
11988 CombineTo(U
, NewNode
);
11989 } else if (U
!= Reciprocal
.getNode()) {
11990 // In the absence of fast-math-flags, this user node is always the
11991 // same node as Reciprocal, but with FMF they may be different nodes.
11992 CombineTo(U
, Reciprocal
);
11995 return SDValue(N
, 0); // N was replaced.
11998 SDValue
DAGCombiner::visitFDIV(SDNode
*N
) {
11999 SDValue N0
= N
->getOperand(0);
12000 SDValue N1
= N
->getOperand(1);
12001 ConstantFPSDNode
*N0CFP
= dyn_cast
<ConstantFPSDNode
>(N0
);
12002 ConstantFPSDNode
*N1CFP
= dyn_cast
<ConstantFPSDNode
>(N1
);
12003 EVT VT
= N
->getValueType(0);
12005 const TargetOptions
&Options
= DAG
.getTarget().Options
;
12006 SDNodeFlags Flags
= N
->getFlags();
12010 if (SDValue FoldedVOp
= SimplifyVBinOp(N
))
12013 // fold (fdiv c1, c2) -> c1/c2
12014 if (N0CFP
&& N1CFP
)
12015 return DAG
.getNode(ISD::FDIV
, SDLoc(N
), VT
, N0
, N1
, Flags
);
12017 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
12020 if (SDValue V
= combineRepeatedFPDivisors(N
))
12023 if (Options
.UnsafeFPMath
|| Flags
.hasAllowReciprocal()) {
12024 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12026 // Compute the reciprocal 1.0 / c2.
12027 const APFloat
&N1APF
= N1CFP
->getValueAPF();
12028 APFloat
Recip(N1APF
.getSemantics(), 1); // 1.0
12029 APFloat::opStatus st
= Recip
.divide(N1APF
, APFloat::rmNearestTiesToEven
);
12030 // Only do the transform if the reciprocal is a legal fp immediate that
12031 // isn't too nasty (eg NaN, denormal, ...).
12032 if ((st
== APFloat::opOK
|| st
== APFloat::opInexact
) && // Not too nasty
12033 (!LegalOperations
||
12034 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12035 // backend)... we should handle this gracefully after Legalize.
12036 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12037 TLI
.isOperationLegal(ISD::ConstantFP
, VT
) ||
12038 TLI
.isFPImmLegal(Recip
, VT
, ForCodeSize
)))
12039 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
,
12040 DAG
.getConstantFP(Recip
, DL
, VT
), Flags
);
12043 // If this FDIV is part of a reciprocal square root, it may be folded
12044 // into a target-specific square root estimate instruction.
12045 if (N1
.getOpcode() == ISD::FSQRT
) {
12046 if (SDValue RV
= buildRsqrtEstimate(N1
.getOperand(0), Flags
)) {
12047 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, RV
, Flags
);
12049 } else if (N1
.getOpcode() == ISD::FP_EXTEND
&&
12050 N1
.getOperand(0).getOpcode() == ISD::FSQRT
) {
12051 if (SDValue RV
= buildRsqrtEstimate(N1
.getOperand(0).getOperand(0),
12053 RV
= DAG
.getNode(ISD::FP_EXTEND
, SDLoc(N1
), VT
, RV
);
12054 AddToWorklist(RV
.getNode());
12055 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, RV
, Flags
);
12057 } else if (N1
.getOpcode() == ISD::FP_ROUND
&&
12058 N1
.getOperand(0).getOpcode() == ISD::FSQRT
) {
12059 if (SDValue RV
= buildRsqrtEstimate(N1
.getOperand(0).getOperand(0),
12061 RV
= DAG
.getNode(ISD::FP_ROUND
, SDLoc(N1
), VT
, RV
, N1
.getOperand(1));
12062 AddToWorklist(RV
.getNode());
12063 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, RV
, Flags
);
12065 } else if (N1
.getOpcode() == ISD::FMUL
) {
12066 // Look through an FMUL. Even though this won't remove the FDIV directly,
12067 // it's still worthwhile to get rid of the FSQRT if possible.
12070 if (N1
.getOperand(0).getOpcode() == ISD::FSQRT
) {
12071 SqrtOp
= N1
.getOperand(0);
12072 OtherOp
= N1
.getOperand(1);
12073 } else if (N1
.getOperand(1).getOpcode() == ISD::FSQRT
) {
12074 SqrtOp
= N1
.getOperand(1);
12075 OtherOp
= N1
.getOperand(0);
12077 if (SqrtOp
.getNode()) {
12078 // We found a FSQRT, so try to make this fold:
12079 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12080 if (SDValue RV
= buildRsqrtEstimate(SqrtOp
.getOperand(0), Flags
)) {
12081 RV
= DAG
.getNode(ISD::FDIV
, SDLoc(N1
), VT
, RV
, OtherOp
, Flags
);
12082 AddToWorklist(RV
.getNode());
12083 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, RV
, Flags
);
12088 // Fold into a reciprocal estimate and multiply instead of a real divide.
12089 if (SDValue RV
= BuildReciprocalEstimate(N1
, Flags
)) {
12090 AddToWorklist(RV
.getNode());
12091 return DAG
.getNode(ISD::FMUL
, DL
, VT
, N0
, RV
, Flags
);
12095 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12096 if (char LHSNeg
= isNegatibleForFree(N0
, LegalOperations
, TLI
, &Options
,
12098 if (char RHSNeg
= isNegatibleForFree(N1
, LegalOperations
, TLI
, &Options
,
12100 // Both can be negated for free, check to see if at least one is cheaper
12102 if (LHSNeg
== 2 || RHSNeg
== 2)
12103 return DAG
.getNode(ISD::FDIV
, SDLoc(N
), VT
,
12104 GetNegatedExpression(N0
, DAG
, LegalOperations
,
12106 GetNegatedExpression(N1
, DAG
, LegalOperations
,
12115 SDValue
DAGCombiner::visitFREM(SDNode
*N
) {
12116 SDValue N0
= N
->getOperand(0);
12117 SDValue N1
= N
->getOperand(1);
12118 ConstantFPSDNode
*N0CFP
= dyn_cast
<ConstantFPSDNode
>(N0
);
12119 ConstantFPSDNode
*N1CFP
= dyn_cast
<ConstantFPSDNode
>(N1
);
12120 EVT VT
= N
->getValueType(0);
12122 // fold (frem c1, c2) -> fmod(c1,c2)
12123 if (N0CFP
&& N1CFP
)
12124 return DAG
.getNode(ISD::FREM
, SDLoc(N
), VT
, N0
, N1
, N
->getFlags());
12126 if (SDValue NewSel
= foldBinOpIntoSelect(N
))
12132 SDValue
DAGCombiner::visitFSQRT(SDNode
*N
) {
12133 SDNodeFlags Flags
= N
->getFlags();
12134 if (!DAG
.getTarget().Options
.UnsafeFPMath
&&
12135 !Flags
.hasApproximateFuncs())
12138 SDValue N0
= N
->getOperand(0);
12139 if (TLI
.isFsqrtCheap(N0
, DAG
))
12142 // FSQRT nodes have flags that propagate to the created nodes.
12143 return buildSqrtEstimate(N0
, Flags
);
12146 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12147 /// copysign(x, fp_round(y)) -> copysign(x, y)
12148 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode
*N
) {
12149 SDValue N1
= N
->getOperand(1);
12150 if ((N1
.getOpcode() == ISD::FP_EXTEND
||
12151 N1
.getOpcode() == ISD::FP_ROUND
)) {
12152 // Do not optimize out type conversion of f128 type yet.
12153 // For some targets like x86_64, configuration is changed to keep one f128
12154 // value in one SSE register, but instruction selection cannot handle
12155 // FCOPYSIGN on SSE registers yet.
12156 EVT N1VT
= N1
->getValueType(0);
12157 EVT N1Op0VT
= N1
->getOperand(0).getValueType();
12158 return (N1VT
== N1Op0VT
|| N1Op0VT
!= MVT::f128
);
12163 SDValue
DAGCombiner::visitFCOPYSIGN(SDNode
*N
) {
12164 SDValue N0
= N
->getOperand(0);
12165 SDValue N1
= N
->getOperand(1);
12166 bool N0CFP
= isConstantFPBuildVectorOrConstantFP(N0
);
12167 bool N1CFP
= isConstantFPBuildVectorOrConstantFP(N1
);
12168 EVT VT
= N
->getValueType(0);
12170 if (N0CFP
&& N1CFP
) // Constant fold
12171 return DAG
.getNode(ISD::FCOPYSIGN
, SDLoc(N
), VT
, N0
, N1
);
12173 if (ConstantFPSDNode
*N1C
= isConstOrConstSplatFP(N
->getOperand(1))) {
12174 const APFloat
&V
= N1C
->getValueAPF();
12175 // copysign(x, c1) -> fabs(x) iff ispos(c1)
12176 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12177 if (!V
.isNegative()) {
12178 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::FABS
, VT
))
12179 return DAG
.getNode(ISD::FABS
, SDLoc(N
), VT
, N0
);
12181 if (!LegalOperations
|| TLI
.isOperationLegal(ISD::FNEG
, VT
))
12182 return DAG
.getNode(ISD::FNEG
, SDLoc(N
), VT
,
12183 DAG
.getNode(ISD::FABS
, SDLoc(N0
), VT
, N0
));
12187 // copysign(fabs(x), y) -> copysign(x, y)
12188 // copysign(fneg(x), y) -> copysign(x, y)
12189 // copysign(copysign(x,z), y) -> copysign(x, y)
12190 if (N0
.getOpcode() == ISD::FABS
|| N0
.getOpcode() == ISD::FNEG
||
12191 N0
.getOpcode() == ISD::FCOPYSIGN
)
12192 return DAG
.getNode(ISD::FCOPYSIGN
, SDLoc(N
), VT
, N0
.getOperand(0), N1
);
12194 // copysign(x, abs(y)) -> abs(x)
12195 if (N1
.getOpcode() == ISD::FABS
)
12196 return DAG
.getNode(ISD::FABS
, SDLoc(N
), VT
, N0
);
12198 // copysign(x, copysign(y,z)) -> copysign(x, z)
12199 if (N1
.getOpcode() == ISD::FCOPYSIGN
)
12200 return DAG
.getNode(ISD::FCOPYSIGN
, SDLoc(N
), VT
, N0
, N1
.getOperand(1));
12202 // copysign(x, fp_extend(y)) -> copysign(x, y)
12203 // copysign(x, fp_round(y)) -> copysign(x, y)
12204 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N
))
12205 return DAG
.getNode(ISD::FCOPYSIGN
, SDLoc(N
), VT
, N0
, N1
.getOperand(0));
12210 SDValue
DAGCombiner::visitFPOW(SDNode
*N
) {
12211 ConstantFPSDNode
*ExponentC
= isConstOrConstSplatFP(N
->getOperand(1));
12215 // Try to convert x ** (1/3) into cube root.
12216 // TODO: Handle the various flavors of long double.
12217 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12218 // Some range near 1/3 should be fine.
12219 EVT VT
= N
->getValueType(0);
12220 if ((VT
== MVT::f32
&& ExponentC
->getValueAPF().isExactlyValue(1.0f
/3.0f
)) ||
12221 (VT
== MVT::f64
&& ExponentC
->getValueAPF().isExactlyValue(1.0/3.0))) {
12222 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12223 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12224 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
12225 // For regular numbers, rounding may cause the results to differ.
12226 // Therefore, we require { nsz ninf nnan afn } for this transform.
12227 // TODO: We could select out the special cases if we don't have nsz/ninf.
12228 SDNodeFlags Flags
= N
->getFlags();
12229 if (!Flags
.hasNoSignedZeros() || !Flags
.hasNoInfs() || !Flags
.hasNoNaNs() ||
12230 !Flags
.hasApproximateFuncs())
12233 // Do not create a cbrt() libcall if the target does not have it, and do not
12234 // turn a pow that has lowering support into a cbrt() libcall.
12235 if (!DAG
.getLibInfo().has(LibFunc_cbrt
) ||
12236 (!DAG
.getTargetLoweringInfo().isOperationExpand(ISD::FPOW
, VT
) &&
12237 DAG
.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT
, VT
)))
12240 return DAG
.getNode(ISD::FCBRT
, SDLoc(N
), VT
, N
->getOperand(0), Flags
);
12243 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12244 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12245 // TODO: This could be extended (using a target hook) to handle smaller
12246 // power-of-2 fractional exponents.
12247 bool ExponentIs025
= ExponentC
->getValueAPF().isExactlyValue(0.25);
12248 bool ExponentIs075
= ExponentC
->getValueAPF().isExactlyValue(0.75);
12249 if (ExponentIs025
|| ExponentIs075
) {
12250 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12251 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
12252 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12253 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
12254 // For regular numbers, rounding may cause the results to differ.
12255 // Therefore, we require { nsz ninf afn } for this transform.
12256 // TODO: We could select out the special cases if we don't have nsz/ninf.
12257 SDNodeFlags Flags
= N
->getFlags();
12259 // We only need no signed zeros for the 0.25 case.
12260 if ((!Flags
.hasNoSignedZeros() && ExponentIs025
) || !Flags
.hasNoInfs() ||
12261 !Flags
.hasApproximateFuncs())
12264 // Don't double the number of libcalls. We are trying to inline fast code.
12265 if (!DAG
.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT
, VT
))
12268 // Assume that libcalls are the smallest code.
12269 // TODO: This restriction should probably be lifted for vectors.
12270 if (DAG
.getMachineFunction().getFunction().hasOptSize())
12273 // pow(X, 0.25) --> sqrt(sqrt(X))
12275 SDValue Sqrt
= DAG
.getNode(ISD::FSQRT
, DL
, VT
, N
->getOperand(0), Flags
);
12276 SDValue SqrtSqrt
= DAG
.getNode(ISD::FSQRT
, DL
, VT
, Sqrt
, Flags
);
12279 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12280 return DAG
.getNode(ISD::FMUL
, DL
, VT
, Sqrt
, SqrtSqrt
, Flags
);
12286 static SDValue
foldFPToIntToFP(SDNode
*N
, SelectionDAG
&DAG
,
12287 const TargetLowering
&TLI
) {
12288 // This optimization is guarded by a function attribute because it may produce
12289 // unexpected results. Ie, programs may be relying on the platform-specific
12290 // undefined behavior when the float-to-int conversion overflows.
12291 const Function
&F
= DAG
.getMachineFunction().getFunction();
12292 Attribute StrictOverflow
= F
.getFnAttribute("strict-float-cast-overflow");
12293 if (StrictOverflow
.getValueAsString().equals("false"))
12296 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12297 // replacing casts with a libcall. We also must be allowed to ignore -0.0
12298 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12299 // conversions would return +0.0.
12300 // FIXME: We should be able to use node-level FMF here.
12301 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12302 EVT VT
= N
->getValueType(0);
12303 if (!TLI
.isOperationLegal(ISD::FTRUNC
, VT
) ||
12304 !DAG
.getTarget().Options
.NoSignedZerosFPMath
)
12307 // fptosi/fptoui round towards zero, so converting from FP to integer and
12308 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12309 SDValue N0
= N
->getOperand(0);
12310 if (N
->getOpcode() == ISD::SINT_TO_FP
&& N0
.getOpcode() == ISD::FP_TO_SINT
&&
12311 N0
.getOperand(0).getValueType() == VT
)
12312 return DAG
.getNode(ISD::FTRUNC
, SDLoc(N
), VT
, N0
.getOperand(0));
12314 if (N
->getOpcode() == ISD::UINT_TO_FP
&& N0
.getOpcode() == ISD::FP_TO_UINT
&&
12315 N0
.getOperand(0).getValueType() == VT
)
12316 return DAG
.getNode(ISD::FTRUNC
, SDLoc(N
), VT
, N0
.getOperand(0));
12321 SDValue
DAGCombiner::visitSINT_TO_FP(SDNode
*N
) {
12322 SDValue N0
= N
->getOperand(0);
12323 EVT VT
= N
->getValueType(0);
12324 EVT OpVT
= N0
.getValueType();
12326 // fold (sint_to_fp c1) -> c1fp
12327 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
12328 // ...but only if the target supports immediate floating-point values
12329 (!LegalOperations
||
12330 TLI
.isOperationLegalOrCustom(ISD::ConstantFP
, VT
)))
12331 return DAG
.getNode(ISD::SINT_TO_FP
, SDLoc(N
), VT
, N0
);
12333 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12334 // but UINT_TO_FP is legal on this target, try to convert.
12335 if (!hasOperation(ISD::SINT_TO_FP
, OpVT
) &&
12336 hasOperation(ISD::UINT_TO_FP
, OpVT
)) {
12337 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12338 if (DAG
.SignBitIsZero(N0
))
12339 return DAG
.getNode(ISD::UINT_TO_FP
, SDLoc(N
), VT
, N0
);
12342 // The next optimizations are desirable only if SELECT_CC can be lowered.
12343 if (TLI
.isOperationLegalOrCustom(ISD::SELECT_CC
, VT
) || !LegalOperations
) {
12344 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12345 if (N0
.getOpcode() == ISD::SETCC
&& N0
.getValueType() == MVT::i1
&&
12347 (!LegalOperations
||
12348 TLI
.isOperationLegalOrCustom(ISD::ConstantFP
, VT
))) {
12351 { N0
.getOperand(0), N0
.getOperand(1),
12352 DAG
.getConstantFP(-1.0, DL
, VT
), DAG
.getConstantFP(0.0, DL
, VT
),
12353 N0
.getOperand(2) };
12354 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
12357 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12358 // (select_cc x, y, 1.0, 0.0,, cc)
12359 if (N0
.getOpcode() == ISD::ZERO_EXTEND
&&
12360 N0
.getOperand(0).getOpcode() == ISD::SETCC
&&!VT
.isVector() &&
12361 (!LegalOperations
||
12362 TLI
.isOperationLegalOrCustom(ISD::ConstantFP
, VT
))) {
12365 { N0
.getOperand(0).getOperand(0), N0
.getOperand(0).getOperand(1),
12366 DAG
.getConstantFP(1.0, DL
, VT
), DAG
.getConstantFP(0.0, DL
, VT
),
12367 N0
.getOperand(0).getOperand(2) };
12368 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
12372 if (SDValue FTrunc
= foldFPToIntToFP(N
, DAG
, TLI
))
12378 SDValue
DAGCombiner::visitUINT_TO_FP(SDNode
*N
) {
12379 SDValue N0
= N
->getOperand(0);
12380 EVT VT
= N
->getValueType(0);
12381 EVT OpVT
= N0
.getValueType();
12383 // fold (uint_to_fp c1) -> c1fp
12384 if (DAG
.isConstantIntBuildVectorOrConstantInt(N0
) &&
12385 // ...but only if the target supports immediate floating-point values
12386 (!LegalOperations
||
12387 TLI
.isOperationLegalOrCustom(ISD::ConstantFP
, VT
)))
12388 return DAG
.getNode(ISD::UINT_TO_FP
, SDLoc(N
), VT
, N0
);
12390 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12391 // but SINT_TO_FP is legal on this target, try to convert.
12392 if (!hasOperation(ISD::UINT_TO_FP
, OpVT
) &&
12393 hasOperation(ISD::SINT_TO_FP
, OpVT
)) {
12394 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12395 if (DAG
.SignBitIsZero(N0
))
12396 return DAG
.getNode(ISD::SINT_TO_FP
, SDLoc(N
), VT
, N0
);
12399 // The next optimizations are desirable only if SELECT_CC can be lowered.
12400 if (TLI
.isOperationLegalOrCustom(ISD::SELECT_CC
, VT
) || !LegalOperations
) {
12401 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12402 if (N0
.getOpcode() == ISD::SETCC
&& !VT
.isVector() &&
12403 (!LegalOperations
||
12404 TLI
.isOperationLegalOrCustom(ISD::ConstantFP
, VT
))) {
12407 { N0
.getOperand(0), N0
.getOperand(1),
12408 DAG
.getConstantFP(1.0, DL
, VT
), DAG
.getConstantFP(0.0, DL
, VT
),
12409 N0
.getOperand(2) };
12410 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, Ops
);
12414 if (SDValue FTrunc
= foldFPToIntToFP(N
, DAG
, TLI
))
12420 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12421 static SDValue
FoldIntToFPToInt(SDNode
*N
, SelectionDAG
&DAG
) {
12422 SDValue N0
= N
->getOperand(0);
12423 EVT VT
= N
->getValueType(0);
12425 if (N0
.getOpcode() != ISD::UINT_TO_FP
&& N0
.getOpcode() != ISD::SINT_TO_FP
)
12428 SDValue Src
= N0
.getOperand(0);
12429 EVT SrcVT
= Src
.getValueType();
12430 bool IsInputSigned
= N0
.getOpcode() == ISD::SINT_TO_FP
;
12431 bool IsOutputSigned
= N
->getOpcode() == ISD::FP_TO_SINT
;
12433 // We can safely assume the conversion won't overflow the output range,
12434 // because (for example) (uint8_t)18293.f is undefined behavior.
12436 // Since we can assume the conversion won't overflow, our decision as to
12437 // whether the input will fit in the float should depend on the minimum
12438 // of the input range and output range.
12440 // This means this is also safe for a signed input and unsigned output, since
12441 // a negative input would lead to undefined behavior.
12442 unsigned InputSize
= (int)SrcVT
.getScalarSizeInBits() - IsInputSigned
;
12443 unsigned OutputSize
= (int)VT
.getScalarSizeInBits() - IsOutputSigned
;
12444 unsigned ActualSize
= std::min(InputSize
, OutputSize
);
12445 const fltSemantics
&sem
= DAG
.EVTToAPFloatSemantics(N0
.getValueType());
12447 // We can only fold away the float conversion if the input range can be
12448 // represented exactly in the float range.
12449 if (APFloat::semanticsPrecision(sem
) >= ActualSize
) {
12450 if (VT
.getScalarSizeInBits() > SrcVT
.getScalarSizeInBits()) {
12451 unsigned ExtOp
= IsInputSigned
&& IsOutputSigned
? ISD::SIGN_EXTEND
12452 : ISD::ZERO_EXTEND
;
12453 return DAG
.getNode(ExtOp
, SDLoc(N
), VT
, Src
);
12455 if (VT
.getScalarSizeInBits() < SrcVT
.getScalarSizeInBits())
12456 return DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), VT
, Src
);
12457 return DAG
.getBitcast(VT
, Src
);
12462 SDValue
DAGCombiner::visitFP_TO_SINT(SDNode
*N
) {
12463 SDValue N0
= N
->getOperand(0);
12464 EVT VT
= N
->getValueType(0);
12466 // fold (fp_to_sint c1fp) -> c1
12467 if (isConstantFPBuildVectorOrConstantFP(N0
))
12468 return DAG
.getNode(ISD::FP_TO_SINT
, SDLoc(N
), VT
, N0
);
12470 return FoldIntToFPToInt(N
, DAG
);
12473 SDValue
DAGCombiner::visitFP_TO_UINT(SDNode
*N
) {
12474 SDValue N0
= N
->getOperand(0);
12475 EVT VT
= N
->getValueType(0);
12477 // fold (fp_to_uint c1fp) -> c1
12478 if (isConstantFPBuildVectorOrConstantFP(N0
))
12479 return DAG
.getNode(ISD::FP_TO_UINT
, SDLoc(N
), VT
, N0
);
12481 return FoldIntToFPToInt(N
, DAG
);
12484 SDValue
DAGCombiner::visitFP_ROUND(SDNode
*N
) {
12485 SDValue N0
= N
->getOperand(0);
12486 SDValue N1
= N
->getOperand(1);
12487 ConstantFPSDNode
*N0CFP
= dyn_cast
<ConstantFPSDNode
>(N0
);
12488 EVT VT
= N
->getValueType(0);
12490 // fold (fp_round c1fp) -> c1fp
12492 return DAG
.getNode(ISD::FP_ROUND
, SDLoc(N
), VT
, N0
, N1
);
12494 // fold (fp_round (fp_extend x)) -> x
12495 if (N0
.getOpcode() == ISD::FP_EXTEND
&& VT
== N0
.getOperand(0).getValueType())
12496 return N0
.getOperand(0);
12498 // fold (fp_round (fp_round x)) -> (fp_round x)
12499 if (N0
.getOpcode() == ISD::FP_ROUND
) {
12500 const bool NIsTrunc
= N
->getConstantOperandVal(1) == 1;
12501 const bool N0IsTrunc
= N0
.getConstantOperandVal(1) == 1;
12503 // Skip this folding if it results in an fp_round from f80 to f16.
12505 // f80 to f16 always generates an expensive (and as yet, unimplemented)
12506 // libcall to __truncxfhf2 instead of selecting native f16 conversion
12507 // instructions from f32 or f64. Moreover, the first (value-preserving)
12508 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12510 if (N0
.getOperand(0).getValueType() == MVT::f80
&& VT
== MVT::f16
)
12513 // If the first fp_round isn't a value preserving truncation, it might
12514 // introduce a tie in the second fp_round, that wouldn't occur in the
12515 // single-step fp_round we want to fold to.
12516 // In other words, double rounding isn't the same as rounding.
12517 // Also, this is a value preserving truncation iff both fp_round's are.
12518 if (DAG
.getTarget().Options
.UnsafeFPMath
|| N0IsTrunc
) {
12520 return DAG
.getNode(ISD::FP_ROUND
, DL
, VT
, N0
.getOperand(0),
12521 DAG
.getIntPtrConstant(NIsTrunc
&& N0IsTrunc
, DL
));
12525 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12526 if (N0
.getOpcode() == ISD::FCOPYSIGN
&& N0
.getNode()->hasOneUse()) {
12527 SDValue Tmp
= DAG
.getNode(ISD::FP_ROUND
, SDLoc(N0
), VT
,
12528 N0
.getOperand(0), N1
);
12529 AddToWorklist(Tmp
.getNode());
12530 return DAG
.getNode(ISD::FCOPYSIGN
, SDLoc(N
), VT
,
12531 Tmp
, N0
.getOperand(1));
12534 if (SDValue NewVSel
= matchVSelectOpSizesWithSetCC(N
))
12540 SDValue
DAGCombiner::visitFP_ROUND_INREG(SDNode
*N
) {
12541 SDValue N0
= N
->getOperand(0);
12542 EVT VT
= N
->getValueType(0);
12543 EVT EVT
= cast
<VTSDNode
>(N
->getOperand(1))->getVT();
12544 ConstantFPSDNode
*N0CFP
= dyn_cast
<ConstantFPSDNode
>(N0
);
12546 // fold (fp_round_inreg c1fp) -> c1fp
12547 if (N0CFP
&& isTypeLegal(EVT
)) {
12549 SDValue Round
= DAG
.getConstantFP(*N0CFP
->getConstantFPValue(), DL
, EVT
);
12550 return DAG
.getNode(ISD::FP_EXTEND
, DL
, VT
, Round
);
12556 SDValue
DAGCombiner::visitFP_EXTEND(SDNode
*N
) {
12557 SDValue N0
= N
->getOperand(0);
12558 EVT VT
= N
->getValueType(0);
12560 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12561 if (N
->hasOneUse() &&
12562 N
->use_begin()->getOpcode() == ISD::FP_ROUND
)
12565 // fold (fp_extend c1fp) -> c1fp
12566 if (isConstantFPBuildVectorOrConstantFP(N0
))
12567 return DAG
.getNode(ISD::FP_EXTEND
, SDLoc(N
), VT
, N0
);
12569 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12570 if (N0
.getOpcode() == ISD::FP16_TO_FP
&&
12571 TLI
.getOperationAction(ISD::FP16_TO_FP
, VT
) == TargetLowering::Legal
)
12572 return DAG
.getNode(ISD::FP16_TO_FP
, SDLoc(N
), VT
, N0
.getOperand(0));
12574 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12576 if (N0
.getOpcode() == ISD::FP_ROUND
12577 && N0
.getConstantOperandVal(1) == 1) {
12578 SDValue In
= N0
.getOperand(0);
12579 if (In
.getValueType() == VT
) return In
;
12580 if (VT
.bitsLT(In
.getValueType()))
12581 return DAG
.getNode(ISD::FP_ROUND
, SDLoc(N
), VT
,
12582 In
, N0
.getOperand(1));
12583 return DAG
.getNode(ISD::FP_EXTEND
, SDLoc(N
), VT
, In
);
12586 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12587 if (ISD::isNormalLoad(N0
.getNode()) && N0
.hasOneUse() &&
12588 TLI
.isLoadExtLegal(ISD::EXTLOAD
, VT
, N0
.getValueType())) {
12589 LoadSDNode
*LN0
= cast
<LoadSDNode
>(N0
);
12590 SDValue ExtLoad
= DAG
.getExtLoad(ISD::EXTLOAD
, SDLoc(N
), VT
,
12592 LN0
->getBasePtr(), N0
.getValueType(),
12593 LN0
->getMemOperand());
12594 CombineTo(N
, ExtLoad
);
12595 CombineTo(N0
.getNode(),
12596 DAG
.getNode(ISD::FP_ROUND
, SDLoc(N0
),
12597 N0
.getValueType(), ExtLoad
,
12598 DAG
.getIntPtrConstant(1, SDLoc(N0
))),
12599 ExtLoad
.getValue(1));
12600 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
12603 if (SDValue NewVSel
= matchVSelectOpSizesWithSetCC(N
))
12609 SDValue
DAGCombiner::visitFCEIL(SDNode
*N
) {
12610 SDValue N0
= N
->getOperand(0);
12611 EVT VT
= N
->getValueType(0);
12613 // fold (fceil c1) -> fceil(c1)
12614 if (isConstantFPBuildVectorOrConstantFP(N0
))
12615 return DAG
.getNode(ISD::FCEIL
, SDLoc(N
), VT
, N0
);
12620 SDValue
DAGCombiner::visitFTRUNC(SDNode
*N
) {
12621 SDValue N0
= N
->getOperand(0);
12622 EVT VT
= N
->getValueType(0);
12624 // fold (ftrunc c1) -> ftrunc(c1)
12625 if (isConstantFPBuildVectorOrConstantFP(N0
))
12626 return DAG
.getNode(ISD::FTRUNC
, SDLoc(N
), VT
, N0
);
12628 // fold ftrunc (known rounded int x) -> x
12629 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12630 // likely to be generated to extract integer from a rounded floating value.
12631 switch (N0
.getOpcode()) {
12635 case ISD::FNEARBYINT
:
12644 SDValue
DAGCombiner::visitFFLOOR(SDNode
*N
) {
12645 SDValue N0
= N
->getOperand(0);
12646 EVT VT
= N
->getValueType(0);
12648 // fold (ffloor c1) -> ffloor(c1)
12649 if (isConstantFPBuildVectorOrConstantFP(N0
))
12650 return DAG
.getNode(ISD::FFLOOR
, SDLoc(N
), VT
, N0
);
12655 // FIXME: FNEG and FABS have a lot in common; refactor.
12656 SDValue
DAGCombiner::visitFNEG(SDNode
*N
) {
12657 SDValue N0
= N
->getOperand(0);
12658 EVT VT
= N
->getValueType(0);
12660 // Constant fold FNEG.
12661 if (isConstantFPBuildVectorOrConstantFP(N0
))
12662 return DAG
.getNode(ISD::FNEG
, SDLoc(N
), VT
, N0
);
12664 if (isNegatibleForFree(N0
, LegalOperations
, DAG
.getTargetLoweringInfo(),
12665 &DAG
.getTarget().Options
, ForCodeSize
))
12666 return GetNegatedExpression(N0
, DAG
, LegalOperations
, ForCodeSize
);
12668 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12669 // constant pool values.
12670 if (!TLI
.isFNegFree(VT
) &&
12671 N0
.getOpcode() == ISD::BITCAST
&&
12672 N0
.getNode()->hasOneUse()) {
12673 SDValue Int
= N0
.getOperand(0);
12674 EVT IntVT
= Int
.getValueType();
12675 if (IntVT
.isInteger() && !IntVT
.isVector()) {
12677 if (N0
.getValueType().isVector()) {
12678 // For a vector, get a mask such as 0x80... per scalar element
12680 SignMask
= APInt::getSignMask(N0
.getScalarValueSizeInBits());
12681 SignMask
= APInt::getSplat(IntVT
.getSizeInBits(), SignMask
);
12683 // For a scalar, just generate 0x80...
12684 SignMask
= APInt::getSignMask(IntVT
.getSizeInBits());
12687 Int
= DAG
.getNode(ISD::XOR
, DL0
, IntVT
, Int
,
12688 DAG
.getConstant(SignMask
, DL0
, IntVT
));
12689 AddToWorklist(Int
.getNode());
12690 return DAG
.getBitcast(VT
, Int
);
12694 // (fneg (fmul c, x)) -> (fmul -c, x)
12695 if (N0
.getOpcode() == ISD::FMUL
&&
12696 (N0
.getNode()->hasOneUse() || !TLI
.isFNegFree(VT
))) {
12697 ConstantFPSDNode
*CFP1
= dyn_cast
<ConstantFPSDNode
>(N0
.getOperand(1));
12699 APFloat CVal
= CFP1
->getValueAPF();
12701 if (Level
>= AfterLegalizeDAG
&&
12702 (TLI
.isFPImmLegal(CVal
, VT
, ForCodeSize
) ||
12703 TLI
.isOperationLegal(ISD::ConstantFP
, VT
)))
12704 return DAG
.getNode(
12705 ISD::FMUL
, SDLoc(N
), VT
, N0
.getOperand(0),
12706 DAG
.getNode(ISD::FNEG
, SDLoc(N
), VT
, N0
.getOperand(1)),
12714 static SDValue
visitFMinMax(SelectionDAG
&DAG
, SDNode
*N
,
12715 APFloat (*Op
)(const APFloat
&, const APFloat
&)) {
12716 SDValue N0
= N
->getOperand(0);
12717 SDValue N1
= N
->getOperand(1);
12718 EVT VT
= N
->getValueType(0);
12719 const ConstantFPSDNode
*N0CFP
= isConstOrConstSplatFP(N0
);
12720 const ConstantFPSDNode
*N1CFP
= isConstOrConstSplatFP(N1
);
12722 if (N0CFP
&& N1CFP
) {
12723 const APFloat
&C0
= N0CFP
->getValueAPF();
12724 const APFloat
&C1
= N1CFP
->getValueAPF();
12725 return DAG
.getConstantFP(Op(C0
, C1
), SDLoc(N
), VT
);
12728 // Canonicalize to constant on RHS.
12729 if (isConstantFPBuildVectorOrConstantFP(N0
) &&
12730 !isConstantFPBuildVectorOrConstantFP(N1
))
12731 return DAG
.getNode(N
->getOpcode(), SDLoc(N
), VT
, N1
, N0
);
12736 SDValue
DAGCombiner::visitFMINNUM(SDNode
*N
) {
12737 return visitFMinMax(DAG
, N
, minnum
);
12740 SDValue
DAGCombiner::visitFMAXNUM(SDNode
*N
) {
12741 return visitFMinMax(DAG
, N
, maxnum
);
12744 SDValue
DAGCombiner::visitFMINIMUM(SDNode
*N
) {
12745 return visitFMinMax(DAG
, N
, minimum
);
12748 SDValue
DAGCombiner::visitFMAXIMUM(SDNode
*N
) {
12749 return visitFMinMax(DAG
, N
, maximum
);
12752 SDValue
DAGCombiner::visitFABS(SDNode
*N
) {
12753 SDValue N0
= N
->getOperand(0);
12754 EVT VT
= N
->getValueType(0);
12756 // fold (fabs c1) -> fabs(c1)
12757 if (isConstantFPBuildVectorOrConstantFP(N0
))
12758 return DAG
.getNode(ISD::FABS
, SDLoc(N
), VT
, N0
);
12760 // fold (fabs (fabs x)) -> (fabs x)
12761 if (N0
.getOpcode() == ISD::FABS
)
12762 return N
->getOperand(0);
12764 // fold (fabs (fneg x)) -> (fabs x)
12765 // fold (fabs (fcopysign x, y)) -> (fabs x)
12766 if (N0
.getOpcode() == ISD::FNEG
|| N0
.getOpcode() == ISD::FCOPYSIGN
)
12767 return DAG
.getNode(ISD::FABS
, SDLoc(N
), VT
, N0
.getOperand(0));
12769 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12770 if (!TLI
.isFAbsFree(VT
) && N0
.getOpcode() == ISD::BITCAST
&& N0
.hasOneUse()) {
12771 SDValue Int
= N0
.getOperand(0);
12772 EVT IntVT
= Int
.getValueType();
12773 if (IntVT
.isInteger() && !IntVT
.isVector()) {
12775 if (N0
.getValueType().isVector()) {
12776 // For a vector, get a mask such as 0x7f... per scalar element
12778 SignMask
= ~APInt::getSignMask(N0
.getScalarValueSizeInBits());
12779 SignMask
= APInt::getSplat(IntVT
.getSizeInBits(), SignMask
);
12781 // For a scalar, just generate 0x7f...
12782 SignMask
= ~APInt::getSignMask(IntVT
.getSizeInBits());
12785 Int
= DAG
.getNode(ISD::AND
, DL
, IntVT
, Int
,
12786 DAG
.getConstant(SignMask
, DL
, IntVT
));
12787 AddToWorklist(Int
.getNode());
12788 return DAG
.getBitcast(N
->getValueType(0), Int
);
12795 SDValue
DAGCombiner::visitBRCOND(SDNode
*N
) {
12796 SDValue Chain
= N
->getOperand(0);
12797 SDValue N1
= N
->getOperand(1);
12798 SDValue N2
= N
->getOperand(2);
12800 // If N is a constant we could fold this into a fallthrough or unconditional
12801 // branch. However that doesn't happen very often in normal code, because
12802 // Instcombine/SimplifyCFG should have handled the available opportunities.
12803 // If we did this folding here, it would be necessary to update the
12804 // MachineBasicBlock CFG, which is awkward.
12806 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12808 if (N1
.getOpcode() == ISD::SETCC
&&
12809 TLI
.isOperationLegalOrCustom(ISD::BR_CC
,
12810 N1
.getOperand(0).getValueType())) {
12811 return DAG
.getNode(ISD::BR_CC
, SDLoc(N
), MVT::Other
,
12812 Chain
, N1
.getOperand(2),
12813 N1
.getOperand(0), N1
.getOperand(1), N2
);
12816 if (N1
.hasOneUse()) {
12817 if (SDValue NewN1
= rebuildSetCC(N1
))
12818 return DAG
.getNode(ISD::BRCOND
, SDLoc(N
), MVT::Other
, Chain
, NewN1
, N2
);
12824 SDValue
DAGCombiner::rebuildSetCC(SDValue N
) {
12825 if (N
.getOpcode() == ISD::SRL
||
12826 (N
.getOpcode() == ISD::TRUNCATE
&&
12827 (N
.getOperand(0).hasOneUse() &&
12828 N
.getOperand(0).getOpcode() == ISD::SRL
))) {
12829 // Look pass the truncate.
12830 if (N
.getOpcode() == ISD::TRUNCATE
)
12831 N
= N
.getOperand(0);
12833 // Match this pattern so that we can generate simpler code:
12836 // %b = and i32 %a, 2
12837 // %c = srl i32 %b, 1
12838 // brcond i32 %c ...
12843 // %b = and i32 %a, 2
12844 // %c = setcc eq %b, 0
12847 // This applies only when the AND constant value has one bit set and the
12848 // SRL constant is equal to the log2 of the AND constant. The back-end is
12849 // smart enough to convert the result into a TEST/JMP sequence.
12850 SDValue Op0
= N
.getOperand(0);
12851 SDValue Op1
= N
.getOperand(1);
12853 if (Op0
.getOpcode() == ISD::AND
&& Op1
.getOpcode() == ISD::Constant
) {
12854 SDValue AndOp1
= Op0
.getOperand(1);
12856 if (AndOp1
.getOpcode() == ISD::Constant
) {
12857 const APInt
&AndConst
= cast
<ConstantSDNode
>(AndOp1
)->getAPIntValue();
12859 if (AndConst
.isPowerOf2() &&
12860 cast
<ConstantSDNode
>(Op1
)->getAPIntValue() == AndConst
.logBase2()) {
12862 return DAG
.getSetCC(DL
, getSetCCResultType(Op0
.getValueType()),
12863 Op0
, DAG
.getConstant(0, DL
, Op0
.getValueType()),
12870 // Transform br(xor(x, y)) -> br(x != y)
12871 // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12872 if (N
.getOpcode() == ISD::XOR
) {
12873 // Because we may call this on a speculatively constructed
12874 // SimplifiedSetCC Node, we need to simplify this node first.
12875 // Ideally this should be folded into SimplifySetCC and not
12876 // here. For now, grab a handle to N so we don't lose it from
12877 // replacements interal to the visit.
12878 HandleSDNode
XORHandle(N
);
12879 while (N
.getOpcode() == ISD::XOR
) {
12880 SDValue Tmp
= visitXOR(N
.getNode());
12881 // No simplification done.
12882 if (!Tmp
.getNode())
12884 // Returning N is form in-visit replacement that may invalidated
12885 // N. Grab value from Handle.
12886 if (Tmp
.getNode() == N
.getNode())
12887 N
= XORHandle
.getValue();
12888 else // Node simplified. Try simplifying again.
12892 if (N
.getOpcode() != ISD::XOR
)
12895 SDNode
*TheXor
= N
.getNode();
12897 SDValue Op0
= TheXor
->getOperand(0);
12898 SDValue Op1
= TheXor
->getOperand(1);
12900 if (Op0
.getOpcode() != ISD::SETCC
&& Op1
.getOpcode() != ISD::SETCC
) {
12901 bool Equal
= false;
12902 if (isOneConstant(Op0
) && Op0
.hasOneUse() &&
12903 Op0
.getOpcode() == ISD::XOR
) {
12904 TheXor
= Op0
.getNode();
12908 EVT SetCCVT
= N
.getValueType();
12910 SetCCVT
= getSetCCResultType(SetCCVT
);
12911 // Replace the uses of XOR with SETCC
12912 return DAG
.getSetCC(SDLoc(TheXor
), SetCCVT
, Op0
, Op1
,
12913 Equal
? ISD::SETEQ
: ISD::SETNE
);
12920 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12922 SDValue
DAGCombiner::visitBR_CC(SDNode
*N
) {
12923 CondCodeSDNode
*CC
= cast
<CondCodeSDNode
>(N
->getOperand(1));
12924 SDValue CondLHS
= N
->getOperand(2), CondRHS
= N
->getOperand(3);
12926 // If N is a constant we could fold this into a fallthrough or unconditional
12927 // branch. However that doesn't happen very often in normal code, because
12928 // Instcombine/SimplifyCFG should have handled the available opportunities.
12929 // If we did this folding here, it would be necessary to update the
12930 // MachineBasicBlock CFG, which is awkward.
12932 // Use SimplifySetCC to simplify SETCC's.
12933 SDValue Simp
= SimplifySetCC(getSetCCResultType(CondLHS
.getValueType()),
12934 CondLHS
, CondRHS
, CC
->get(), SDLoc(N
),
12936 if (Simp
.getNode()) AddToWorklist(Simp
.getNode());
12938 // fold to a simpler setcc
12939 if (Simp
.getNode() && Simp
.getOpcode() == ISD::SETCC
)
12940 return DAG
.getNode(ISD::BR_CC
, SDLoc(N
), MVT::Other
,
12941 N
->getOperand(0), Simp
.getOperand(2),
12942 Simp
.getOperand(0), Simp
.getOperand(1),
12948 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12949 /// and that N may be folded in the load / store addressing mode.
12950 static bool canFoldInAddressingMode(SDNode
*N
, SDNode
*Use
,
12952 const TargetLowering
&TLI
) {
12956 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Use
)) {
12957 if (LD
->isIndexed() || LD
->getBasePtr().getNode() != N
)
12959 VT
= LD
->getMemoryVT();
12960 AS
= LD
->getAddressSpace();
12961 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(Use
)) {
12962 if (ST
->isIndexed() || ST
->getBasePtr().getNode() != N
)
12964 VT
= ST
->getMemoryVT();
12965 AS
= ST
->getAddressSpace();
12969 TargetLowering::AddrMode AM
;
12970 if (N
->getOpcode() == ISD::ADD
) {
12971 AM
.HasBaseReg
= true;
12972 ConstantSDNode
*Offset
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
12975 AM
.BaseOffs
= Offset
->getSExtValue();
12979 } else if (N
->getOpcode() == ISD::SUB
) {
12980 AM
.HasBaseReg
= true;
12981 ConstantSDNode
*Offset
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
12984 AM
.BaseOffs
= -Offset
->getSExtValue();
12991 return TLI
.isLegalAddressingMode(DAG
.getDataLayout(), AM
,
12992 VT
.getTypeForEVT(*DAG
.getContext()), AS
);
12995 /// Try turning a load/store into a pre-indexed load/store when the base
12996 /// pointer is an add or subtract and it has other uses besides the load/store.
12997 /// After the transformation, the new indexed load/store has effectively folded
12998 /// the add/subtract in and all of its other uses are redirected to the
12999 /// new load/store.
13000 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode
*N
) {
13001 if (Level
< AfterLegalizeDAG
)
13004 bool isLoad
= true;
13007 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
13008 if (LD
->isIndexed())
13010 VT
= LD
->getMemoryVT();
13011 if (!TLI
.isIndexedLoadLegal(ISD::PRE_INC
, VT
) &&
13012 !TLI
.isIndexedLoadLegal(ISD::PRE_DEC
, VT
))
13014 Ptr
= LD
->getBasePtr();
13015 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
13016 if (ST
->isIndexed())
13018 VT
= ST
->getMemoryVT();
13019 if (!TLI
.isIndexedStoreLegal(ISD::PRE_INC
, VT
) &&
13020 !TLI
.isIndexedStoreLegal(ISD::PRE_DEC
, VT
))
13022 Ptr
= ST
->getBasePtr();
13028 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13029 // out. There is no reason to make this a preinc/predec.
13030 if ((Ptr
.getOpcode() != ISD::ADD
&& Ptr
.getOpcode() != ISD::SUB
) ||
13031 Ptr
.getNode()->hasOneUse())
13034 // Ask the target to do addressing mode selection.
13037 ISD::MemIndexedMode AM
= ISD::UNINDEXED
;
13038 if (!TLI
.getPreIndexedAddressParts(N
, BasePtr
, Offset
, AM
, DAG
))
13041 // Backends without true r+i pre-indexed forms may need to pass a
13042 // constant base with a variable offset so that constant coercion
13043 // will work with the patterns in canonical form.
13044 bool Swapped
= false;
13045 if (isa
<ConstantSDNode
>(BasePtr
)) {
13046 std::swap(BasePtr
, Offset
);
13050 // Don't create a indexed load / store with zero offset.
13051 if (isNullConstant(Offset
))
13054 // Try turning it into a pre-indexed load / store except when:
13055 // 1) The new base ptr is a frame index.
13056 // 2) If N is a store and the new base ptr is either the same as or is a
13057 // predecessor of the value being stored.
13058 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13059 // that would create a cycle.
13060 // 4) All uses are load / store ops that use it as old base ptr.
13062 // Check #1. Preinc'ing a frame index would require copying the stack pointer
13063 // (plus the implicit offset) to a register to preinc anyway.
13064 if (isa
<FrameIndexSDNode
>(BasePtr
) || isa
<RegisterSDNode
>(BasePtr
))
13069 SDValue Val
= cast
<StoreSDNode
>(N
)->getValue();
13071 // Would require a copy.
13072 if (Val
== BasePtr
)
13075 // Would create a cycle.
13076 if (Val
== Ptr
|| Ptr
->isPredecessorOf(Val
.getNode()))
13080 // Caches for hasPredecessorHelper.
13081 SmallPtrSet
<const SDNode
*, 32> Visited
;
13082 SmallVector
<const SDNode
*, 16> Worklist
;
13083 Worklist
.push_back(N
);
13085 // If the offset is a constant, there may be other adds of constants that
13086 // can be folded with this one. We should do this to avoid having to keep
13087 // a copy of the original base pointer.
13088 SmallVector
<SDNode
*, 16> OtherUses
;
13089 if (isa
<ConstantSDNode
>(Offset
))
13090 for (SDNode::use_iterator UI
= BasePtr
.getNode()->use_begin(),
13091 UE
= BasePtr
.getNode()->use_end();
13093 SDUse
&Use
= UI
.getUse();
13094 // Skip the use that is Ptr and uses of other results from BasePtr's
13095 // node (important for nodes that return multiple results).
13096 if (Use
.getUser() == Ptr
.getNode() || Use
!= BasePtr
)
13099 if (SDNode::hasPredecessorHelper(Use
.getUser(), Visited
, Worklist
))
13102 if (Use
.getUser()->getOpcode() != ISD::ADD
&&
13103 Use
.getUser()->getOpcode() != ISD::SUB
) {
13108 SDValue Op1
= Use
.getUser()->getOperand((UI
.getOperandNo() + 1) & 1);
13109 if (!isa
<ConstantSDNode
>(Op1
)) {
13114 // FIXME: In some cases, we can be smarter about this.
13115 if (Op1
.getValueType() != Offset
.getValueType()) {
13120 OtherUses
.push_back(Use
.getUser());
13124 std::swap(BasePtr
, Offset
);
13126 // Now check for #3 and #4.
13127 bool RealUse
= false;
13129 for (SDNode
*Use
: Ptr
.getNode()->uses()) {
13132 if (SDNode::hasPredecessorHelper(Use
, Visited
, Worklist
))
13135 // If Ptr may be folded in addressing mode of other use, then it's
13136 // not profitable to do this transformation.
13137 if (!canFoldInAddressingMode(Ptr
.getNode(), Use
, DAG
, TLI
))
13146 Result
= DAG
.getIndexedLoad(SDValue(N
,0), SDLoc(N
),
13147 BasePtr
, Offset
, AM
);
13149 Result
= DAG
.getIndexedStore(SDValue(N
,0), SDLoc(N
),
13150 BasePtr
, Offset
, AM
);
13153 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N
->dump(&DAG
); dbgs() << "\nWith: ";
13154 Result
.getNode()->dump(&DAG
); dbgs() << '\n');
13155 WorklistRemover
DeadNodes(*this);
13157 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
.getValue(0));
13158 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), Result
.getValue(2));
13160 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
.getValue(1));
13163 // Finally, since the node is now dead, remove it from the graph.
13164 deleteAndRecombine(N
);
13167 std::swap(BasePtr
, Offset
);
13169 // Replace other uses of BasePtr that can be updated to use Ptr
13170 for (unsigned i
= 0, e
= OtherUses
.size(); i
!= e
; ++i
) {
13171 unsigned OffsetIdx
= 1;
13172 if (OtherUses
[i
]->getOperand(OffsetIdx
).getNode() == BasePtr
.getNode())
13174 assert(OtherUses
[i
]->getOperand(!OffsetIdx
).getNode() ==
13175 BasePtr
.getNode() && "Expected BasePtr operand");
13177 // We need to replace ptr0 in the following expression:
13178 // x0 * offset0 + y0 * ptr0 = t0
13180 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13182 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13183 // indexed load/store and the expression that needs to be re-written.
13185 // Therefore, we have:
13186 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13188 ConstantSDNode
*CN
=
13189 cast
<ConstantSDNode
>(OtherUses
[i
]->getOperand(OffsetIdx
));
13190 int X0
, X1
, Y0
, Y1
;
13191 const APInt
&Offset0
= CN
->getAPIntValue();
13192 APInt Offset1
= cast
<ConstantSDNode
>(Offset
)->getAPIntValue();
13194 X0
= (OtherUses
[i
]->getOpcode() == ISD::SUB
&& OffsetIdx
== 1) ? -1 : 1;
13195 Y0
= (OtherUses
[i
]->getOpcode() == ISD::SUB
&& OffsetIdx
== 0) ? -1 : 1;
13196 X1
= (AM
== ISD::PRE_DEC
&& !Swapped
) ? -1 : 1;
13197 Y1
= (AM
== ISD::PRE_DEC
&& Swapped
) ? -1 : 1;
13199 unsigned Opcode
= (Y0
* Y1
< 0) ? ISD::SUB
: ISD::ADD
;
13201 APInt CNV
= Offset0
;
13202 if (X0
< 0) CNV
= -CNV
;
13203 if (X1
* Y0
* Y1
< 0) CNV
= CNV
+ Offset1
;
13204 else CNV
= CNV
- Offset1
;
13206 SDLoc
DL(OtherUses
[i
]);
13208 // We can now generate the new expression.
13209 SDValue NewOp1
= DAG
.getConstant(CNV
, DL
, CN
->getValueType(0));
13210 SDValue NewOp2
= Result
.getValue(isLoad
? 1 : 0);
13212 SDValue NewUse
= DAG
.getNode(Opcode
,
13214 OtherUses
[i
]->getValueType(0), NewOp1
, NewOp2
);
13215 DAG
.ReplaceAllUsesOfValueWith(SDValue(OtherUses
[i
], 0), NewUse
);
13216 deleteAndRecombine(OtherUses
[i
]);
13219 // Replace the uses of Ptr with uses of the updated base value.
13220 DAG
.ReplaceAllUsesOfValueWith(Ptr
, Result
.getValue(isLoad
? 1 : 0));
13221 deleteAndRecombine(Ptr
.getNode());
13222 AddToWorklist(Result
.getNode());
13227 /// Try to combine a load/store with a add/sub of the base pointer node into a
13228 /// post-indexed load/store. The transformation folded the add/subtract into the
13229 /// new indexed load/store effectively and all of its uses are redirected to the
13230 /// new load/store.
13231 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode
*N
) {
13232 if (Level
< AfterLegalizeDAG
)
13235 bool isLoad
= true;
13238 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
13239 if (LD
->isIndexed())
13241 VT
= LD
->getMemoryVT();
13242 if (!TLI
.isIndexedLoadLegal(ISD::POST_INC
, VT
) &&
13243 !TLI
.isIndexedLoadLegal(ISD::POST_DEC
, VT
))
13245 Ptr
= LD
->getBasePtr();
13246 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
13247 if (ST
->isIndexed())
13249 VT
= ST
->getMemoryVT();
13250 if (!TLI
.isIndexedStoreLegal(ISD::POST_INC
, VT
) &&
13251 !TLI
.isIndexedStoreLegal(ISD::POST_DEC
, VT
))
13253 Ptr
= ST
->getBasePtr();
13259 if (Ptr
.getNode()->hasOneUse())
13262 for (SDNode
*Op
: Ptr
.getNode()->uses()) {
13264 (Op
->getOpcode() != ISD::ADD
&& Op
->getOpcode() != ISD::SUB
))
13269 ISD::MemIndexedMode AM
= ISD::UNINDEXED
;
13270 if (TLI
.getPostIndexedAddressParts(N
, Op
, BasePtr
, Offset
, AM
, DAG
)) {
13271 // Don't create a indexed load / store with zero offset.
13272 if (isNullConstant(Offset
))
13275 // Try turning it into a post-indexed load / store except when
13276 // 1) All uses are load / store ops that use it as base ptr (and
13277 // it may be folded as addressing mmode).
13278 // 2) Op must be independent of N, i.e. Op is neither a predecessor
13279 // nor a successor of N. Otherwise, if Op is folded that would
13282 if (isa
<FrameIndexSDNode
>(BasePtr
) || isa
<RegisterSDNode
>(BasePtr
))
13286 bool TryNext
= false;
13287 for (SDNode
*Use
: BasePtr
.getNode()->uses()) {
13288 if (Use
== Ptr
.getNode())
13291 // If all the uses are load / store addresses, then don't do the
13293 if (Use
->getOpcode() == ISD::ADD
|| Use
->getOpcode() == ISD::SUB
){
13294 bool RealUse
= false;
13295 for (SDNode
*UseUse
: Use
->uses()) {
13296 if (!canFoldInAddressingMode(Use
, UseUse
, DAG
, TLI
))
13311 SmallPtrSet
<const SDNode
*, 32> Visited
;
13312 SmallVector
<const SDNode
*, 8> Worklist
;
13313 // Ptr is predecessor to both N and Op.
13314 Visited
.insert(Ptr
.getNode());
13315 Worklist
.push_back(N
);
13316 Worklist
.push_back(Op
);
13317 if (!SDNode::hasPredecessorHelper(N
, Visited
, Worklist
) &&
13318 !SDNode::hasPredecessorHelper(Op
, Visited
, Worklist
)) {
13319 SDValue Result
= isLoad
13320 ? DAG
.getIndexedLoad(SDValue(N
,0), SDLoc(N
),
13321 BasePtr
, Offset
, AM
)
13322 : DAG
.getIndexedStore(SDValue(N
,0), SDLoc(N
),
13323 BasePtr
, Offset
, AM
);
13324 ++PostIndexedNodes
;
13326 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N
->dump(&DAG
);
13327 dbgs() << "\nWith: "; Result
.getNode()->dump(&DAG
);
13329 WorklistRemover
DeadNodes(*this);
13331 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
.getValue(0));
13332 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), Result
.getValue(2));
13334 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
.getValue(1));
13337 // Finally, since the node is now dead, remove it from the graph.
13338 deleteAndRecombine(N
);
13340 // Replace the uses of Use with uses of the updated base value.
13341 DAG
.ReplaceAllUsesOfValueWith(SDValue(Op
, 0),
13342 Result
.getValue(isLoad
? 1 : 0));
13343 deleteAndRecombine(Op
);
13352 /// Return the base-pointer arithmetic from an indexed \p LD.
13353 SDValue
DAGCombiner::SplitIndexingFromLoad(LoadSDNode
*LD
) {
13354 ISD::MemIndexedMode AM
= LD
->getAddressingMode();
13355 assert(AM
!= ISD::UNINDEXED
);
13356 SDValue BP
= LD
->getOperand(1);
13357 SDValue Inc
= LD
->getOperand(2);
13359 // Some backends use TargetConstants for load offsets, but don't expect
13360 // TargetConstants in general ADD nodes. We can convert these constants into
13361 // regular Constants (if the constant is not opaque).
13362 assert((Inc
.getOpcode() != ISD::TargetConstant
||
13363 !cast
<ConstantSDNode
>(Inc
)->isOpaque()) &&
13364 "Cannot split out indexing using opaque target constants");
13365 if (Inc
.getOpcode() == ISD::TargetConstant
) {
13366 ConstantSDNode
*ConstInc
= cast
<ConstantSDNode
>(Inc
);
13367 Inc
= DAG
.getConstant(*ConstInc
->getConstantIntValue(), SDLoc(Inc
),
13368 ConstInc
->getValueType(0));
13372 (AM
== ISD::PRE_INC
|| AM
== ISD::POST_INC
? ISD::ADD
: ISD::SUB
);
13373 return DAG
.getNode(Opc
, SDLoc(LD
), BP
.getSimpleValueType(), BP
, Inc
);
13376 static inline int numVectorEltsOrZero(EVT T
) {
13377 return T
.isVector() ? T
.getVectorNumElements() : 0;
13380 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode
*ST
, SDValue
&Val
) {
13381 Val
= ST
->getValue();
13382 EVT STType
= Val
.getValueType();
13383 EVT STMemType
= ST
->getMemoryVT();
13384 if (STType
== STMemType
)
13386 if (isTypeLegal(STMemType
))
13387 return false; // fail.
13388 if (STType
.isFloatingPoint() && STMemType
.isFloatingPoint() &&
13389 TLI
.isOperationLegal(ISD::FTRUNC
, STMemType
)) {
13390 Val
= DAG
.getNode(ISD::FTRUNC
, SDLoc(ST
), STMemType
, Val
);
13393 if (numVectorEltsOrZero(STType
) == numVectorEltsOrZero(STMemType
) &&
13394 STType
.isInteger() && STMemType
.isInteger()) {
13395 Val
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(ST
), STMemType
, Val
);
13398 if (STType
.getSizeInBits() == STMemType
.getSizeInBits()) {
13399 Val
= DAG
.getBitcast(STMemType
, Val
);
13402 return false; // fail.
13405 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode
*LD
, SDValue
&Val
) {
13406 EVT LDMemType
= LD
->getMemoryVT();
13407 EVT LDType
= LD
->getValueType(0);
13408 assert(Val
.getValueType() == LDMemType
&&
13409 "Attempting to extend value of non-matching type");
13410 if (LDType
== LDMemType
)
13412 if (LDMemType
.isInteger() && LDType
.isInteger()) {
13413 switch (LD
->getExtensionType()) {
13414 case ISD::NON_EXTLOAD
:
13415 Val
= DAG
.getBitcast(LDType
, Val
);
13418 Val
= DAG
.getNode(ISD::ANY_EXTEND
, SDLoc(LD
), LDType
, Val
);
13420 case ISD::SEXTLOAD
:
13421 Val
= DAG
.getNode(ISD::SIGN_EXTEND
, SDLoc(LD
), LDType
, Val
);
13423 case ISD::ZEXTLOAD
:
13424 Val
= DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(LD
), LDType
, Val
);
13431 SDValue
DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode
*LD
) {
13432 if (OptLevel
== CodeGenOpt::None
|| LD
->isVolatile())
13434 SDValue Chain
= LD
->getOperand(0);
13435 StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(Chain
.getNode());
13436 if (!ST
|| ST
->isVolatile())
13439 EVT LDType
= LD
->getValueType(0);
13440 EVT LDMemType
= LD
->getMemoryVT();
13441 EVT STMemType
= ST
->getMemoryVT();
13442 EVT STType
= ST
->getValue().getValueType();
13444 BaseIndexOffset BasePtrLD
= BaseIndexOffset::match(LD
, DAG
);
13445 BaseIndexOffset BasePtrST
= BaseIndexOffset::match(ST
, DAG
);
13447 if (!BasePtrST
.equalBaseIndex(BasePtrLD
, DAG
, Offset
))
13450 // Normalize for Endianness. After this Offset=0 will denote that the least
13451 // significant bit in the loaded value maps to the least significant bit in
13452 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13453 // n:th least significant byte of the stored value.
13454 if (DAG
.getDataLayout().isBigEndian())
13455 Offset
= (STMemType
.getStoreSizeInBits() -
13456 LDMemType
.getStoreSizeInBits()) / 8 - Offset
;
13458 // Check that the stored value cover all bits that are loaded.
13461 (Offset
* 8 + LDMemType
.getSizeInBits() <= STMemType
.getSizeInBits());
13463 auto ReplaceLd
= [&](LoadSDNode
*LD
, SDValue Val
, SDValue Chain
) -> SDValue
{
13464 if (LD
->isIndexed()) {
13465 bool IsSub
= (LD
->getAddressingMode() == ISD::PRE_DEC
||
13466 LD
->getAddressingMode() == ISD::POST_DEC
);
13467 unsigned Opc
= IsSub
? ISD::SUB
: ISD::ADD
;
13468 SDValue Idx
= DAG
.getNode(Opc
, SDLoc(LD
), LD
->getOperand(1).getValueType(),
13469 LD
->getOperand(1), LD
->getOperand(2));
13470 SDValue Ops
[] = {Val
, Idx
, Chain
};
13471 return CombineTo(LD
, Ops
, 3);
13473 return CombineTo(LD
, Val
, Chain
);
13479 // Memory as copy space (potentially masked).
13480 if (Offset
== 0 && LDType
== STType
&& STMemType
== LDMemType
) {
13481 // Simple case: Direct non-truncating forwarding
13482 if (LDType
.getSizeInBits() == LDMemType
.getSizeInBits())
13483 return ReplaceLd(LD
, ST
->getValue(), Chain
);
13484 // Can we model the truncate and extension with an and mask?
13485 if (STType
.isInteger() && LDMemType
.isInteger() && !STType
.isVector() &&
13486 !LDMemType
.isVector() && LD
->getExtensionType() != ISD::SEXTLOAD
) {
13487 // Mask to size of LDMemType
13489 DAG
.getConstant(APInt::getLowBitsSet(STType
.getSizeInBits(),
13490 STMemType
.getSizeInBits()),
13491 SDLoc(ST
), STType
);
13492 auto Val
= DAG
.getNode(ISD::AND
, SDLoc(LD
), LDType
, ST
->getValue(), Mask
);
13493 return ReplaceLd(LD
, Val
, Chain
);
13497 // TODO: Deal with nonzero offset.
13498 if (LD
->getBasePtr().isUndef() || Offset
!= 0)
13500 // Model necessary truncations / extenstions.
13502 // Truncate Value To Stored Memory Size.
13504 if (!getTruncatedStoreValue(ST
, Val
))
13506 if (!isTypeLegal(LDMemType
))
13508 if (STMemType
!= LDMemType
) {
13509 // TODO: Support vectors? This requires extract_subvector/bitcast.
13510 if (!STMemType
.isVector() && !LDMemType
.isVector() &&
13511 STMemType
.isInteger() && LDMemType
.isInteger())
13512 Val
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(LD
), LDMemType
, Val
);
13516 if (!extendLoadedValueToExtension(LD
, Val
))
13518 return ReplaceLd(LD
, Val
, Chain
);
13521 // On failure, cleanup dead nodes we may have created.
13522 if (Val
->use_empty())
13523 deleteAndRecombine(Val
.getNode());
13527 SDValue
DAGCombiner::visitLOAD(SDNode
*N
) {
13528 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
13529 SDValue Chain
= LD
->getChain();
13530 SDValue Ptr
= LD
->getBasePtr();
13532 // If load is not volatile and there are no uses of the loaded value (and
13533 // the updated indexed value in case of indexed loads), change uses of the
13534 // chain value into uses of the chain input (i.e. delete the dead load).
13535 if (!LD
->isVolatile()) {
13536 if (N
->getValueType(1) == MVT::Other
) {
13537 // Unindexed loads.
13538 if (!N
->hasAnyUseOfValue(0)) {
13539 // It's not safe to use the two value CombineTo variant here. e.g.
13540 // v1, chain2 = load chain1, loc
13541 // v2, chain3 = load chain2, loc
13543 // Now we replace use of chain2 with chain1. This makes the second load
13544 // isomorphic to the one we are deleting, and thus makes this load live.
13545 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N
->dump(&DAG
);
13546 dbgs() << "\nWith chain: "; Chain
.getNode()->dump(&DAG
);
13548 WorklistRemover
DeadNodes(*this);
13549 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), Chain
);
13550 AddUsersToWorklist(Chain
.getNode());
13551 if (N
->use_empty())
13552 deleteAndRecombine(N
);
13554 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
13558 assert(N
->getValueType(2) == MVT::Other
&& "Malformed indexed loads?");
13560 // If this load has an opaque TargetConstant offset, then we cannot split
13561 // the indexing into an add/sub directly (that TargetConstant may not be
13562 // valid for a different type of node, and we cannot convert an opaque
13563 // target constant into a regular constant).
13564 bool HasOTCInc
= LD
->getOperand(2).getOpcode() == ISD::TargetConstant
&&
13565 cast
<ConstantSDNode
>(LD
->getOperand(2))->isOpaque();
13567 if (!N
->hasAnyUseOfValue(0) &&
13568 ((MaySplitLoadIndex
&& !HasOTCInc
) || !N
->hasAnyUseOfValue(1))) {
13569 SDValue Undef
= DAG
.getUNDEF(N
->getValueType(0));
13571 if (N
->hasAnyUseOfValue(1) && MaySplitLoadIndex
&& !HasOTCInc
) {
13572 Index
= SplitIndexingFromLoad(LD
);
13573 // Try to fold the base pointer arithmetic into subsequent loads and
13575 AddUsersToWorklist(N
);
13577 Index
= DAG
.getUNDEF(N
->getValueType(1));
13578 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N
->dump(&DAG
);
13579 dbgs() << "\nWith: "; Undef
.getNode()->dump(&DAG
);
13580 dbgs() << " and 2 other values\n");
13581 WorklistRemover
DeadNodes(*this);
13582 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 0), Undef
);
13583 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), Index
);
13584 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 2), Chain
);
13585 deleteAndRecombine(N
);
13586 return SDValue(N
, 0); // Return N so it doesn't get rechecked!
13591 // If this load is directly stored, replace the load value with the stored
13593 if (auto V
= ForwardStoreValueToDirectLoad(LD
))
13596 // Try to infer better alignment information than the load already has.
13597 if (OptLevel
!= CodeGenOpt::None
&& LD
->isUnindexed()) {
13598 if (unsigned Align
= DAG
.InferPtrAlignment(Ptr
)) {
13599 if (Align
> LD
->getAlignment() && LD
->getSrcValueOffset() % Align
== 0) {
13600 SDValue NewLoad
= DAG
.getExtLoad(
13601 LD
->getExtensionType(), SDLoc(N
), LD
->getValueType(0), Chain
, Ptr
,
13602 LD
->getPointerInfo(), LD
->getMemoryVT(), Align
,
13603 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
13604 // NewLoad will always be N as we are only refining the alignment
13605 assert(NewLoad
.getNode() == N
);
13611 if (LD
->isUnindexed()) {
13612 // Walk up chain skipping non-aliasing memory nodes.
13613 SDValue BetterChain
= FindBetterChain(LD
, Chain
);
13615 // If there is a better chain.
13616 if (Chain
!= BetterChain
) {
13619 // Replace the chain to void dependency.
13620 if (LD
->getExtensionType() == ISD::NON_EXTLOAD
) {
13621 ReplLoad
= DAG
.getLoad(N
->getValueType(0), SDLoc(LD
),
13622 BetterChain
, Ptr
, LD
->getMemOperand());
13624 ReplLoad
= DAG
.getExtLoad(LD
->getExtensionType(), SDLoc(LD
),
13625 LD
->getValueType(0),
13626 BetterChain
, Ptr
, LD
->getMemoryVT(),
13627 LD
->getMemOperand());
13630 // Create token factor to keep old chain connected.
13631 SDValue Token
= DAG
.getNode(ISD::TokenFactor
, SDLoc(N
),
13632 MVT::Other
, Chain
, ReplLoad
.getValue(1));
13634 // Replace uses with load result and token factor
13635 return CombineTo(N
, ReplLoad
.getValue(0), Token
);
13639 // Try transforming N to an indexed load.
13640 if (CombineToPreIndexedLoadStore(N
) || CombineToPostIndexedLoadStore(N
))
13641 return SDValue(N
, 0);
13643 // Try to slice up N to more direct loads if the slices are mapped to
13644 // different register banks or pairing can take place.
13645 if (SliceUpLoad(N
))
13646 return SDValue(N
, 0);
13653 /// Helper structure used to slice a load in smaller loads.
13654 /// Basically a slice is obtained from the following sequence:
13655 /// Origin = load Ty1, Base
13656 /// Shift = srl Ty1 Origin, CstTy Amount
13657 /// Inst = trunc Shift to Ty2
13659 /// Then, it will be rewritten into:
13660 /// Slice = load SliceTy, Base + SliceOffset
13661 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13663 /// SliceTy is deduced from the number of bits that are actually used to
13665 struct LoadedSlice
{
13666 /// Helper structure used to compute the cost of a slice.
13668 /// Are we optimizing for code size.
13672 unsigned Loads
= 0;
13673 unsigned Truncates
= 0;
13674 unsigned CrossRegisterBanksCopies
= 0;
13675 unsigned ZExts
= 0;
13676 unsigned Shift
= 0;
13678 Cost(bool ForCodeSize
= false) : ForCodeSize(ForCodeSize
) {}
13680 /// Get the cost of one isolated slice.
13681 Cost(const LoadedSlice
&LS
, bool ForCodeSize
= false)
13682 : ForCodeSize(ForCodeSize
), Loads(1) {
13683 EVT TruncType
= LS
.Inst
->getValueType(0);
13684 EVT LoadedType
= LS
.getLoadedType();
13685 if (TruncType
!= LoadedType
&&
13686 !LS
.DAG
->getTargetLoweringInfo().isZExtFree(LoadedType
, TruncType
))
13690 /// Account for slicing gain in the current cost.
13691 /// Slicing provide a few gains like removing a shift or a
13692 /// truncate. This method allows to grow the cost of the original
13693 /// load with the gain from this slice.
13694 void addSliceGain(const LoadedSlice
&LS
) {
13695 // Each slice saves a truncate.
13696 const TargetLowering
&TLI
= LS
.DAG
->getTargetLoweringInfo();
13697 if (!TLI
.isTruncateFree(LS
.Inst
->getOperand(0).getValueType(),
13698 LS
.Inst
->getValueType(0)))
13700 // If there is a shift amount, this slice gets rid of it.
13703 // If this slice can merge a cross register bank copy, account for it.
13704 if (LS
.canMergeExpensiveCrossRegisterBankCopy())
13705 ++CrossRegisterBanksCopies
;
13708 Cost
&operator+=(const Cost
&RHS
) {
13709 Loads
+= RHS
.Loads
;
13710 Truncates
+= RHS
.Truncates
;
13711 CrossRegisterBanksCopies
+= RHS
.CrossRegisterBanksCopies
;
13712 ZExts
+= RHS
.ZExts
;
13713 Shift
+= RHS
.Shift
;
13717 bool operator==(const Cost
&RHS
) const {
13718 return Loads
== RHS
.Loads
&& Truncates
== RHS
.Truncates
&&
13719 CrossRegisterBanksCopies
== RHS
.CrossRegisterBanksCopies
&&
13720 ZExts
== RHS
.ZExts
&& Shift
== RHS
.Shift
;
13723 bool operator!=(const Cost
&RHS
) const { return !(*this == RHS
); }
13725 bool operator<(const Cost
&RHS
) const {
13726 // Assume cross register banks copies are as expensive as loads.
13727 // FIXME: Do we want some more target hooks?
13728 unsigned ExpensiveOpsLHS
= Loads
+ CrossRegisterBanksCopies
;
13729 unsigned ExpensiveOpsRHS
= RHS
.Loads
+ RHS
.CrossRegisterBanksCopies
;
13730 // Unless we are optimizing for code size, consider the
13731 // expensive operation first.
13732 if (!ForCodeSize
&& ExpensiveOpsLHS
!= ExpensiveOpsRHS
)
13733 return ExpensiveOpsLHS
< ExpensiveOpsRHS
;
13734 return (Truncates
+ ZExts
+ Shift
+ ExpensiveOpsLHS
) <
13735 (RHS
.Truncates
+ RHS
.ZExts
+ RHS
.Shift
+ ExpensiveOpsRHS
);
13738 bool operator>(const Cost
&RHS
) const { return RHS
< *this; }
13740 bool operator<=(const Cost
&RHS
) const { return !(RHS
< *this); }
13742 bool operator>=(const Cost
&RHS
) const { return !(*this < RHS
); }
13745 // The last instruction that represent the slice. This should be a
13746 // truncate instruction.
13749 // The original load instruction.
13750 LoadSDNode
*Origin
;
13752 // The right shift amount in bits from the original load.
13755 // The DAG from which Origin came from.
13756 // This is used to get some contextual information about legal types, etc.
13759 LoadedSlice(SDNode
*Inst
= nullptr, LoadSDNode
*Origin
= nullptr,
13760 unsigned Shift
= 0, SelectionDAG
*DAG
= nullptr)
13761 : Inst(Inst
), Origin(Origin
), Shift(Shift
), DAG(DAG
) {}
13763 /// Get the bits used in a chunk of bits \p BitWidth large.
13764 /// \return Result is \p BitWidth and has used bits set to 1 and
13765 /// not used bits set to 0.
13766 APInt
getUsedBits() const {
13767 // Reproduce the trunc(lshr) sequence:
13768 // - Start from the truncated value.
13769 // - Zero extend to the desired bit width.
13771 assert(Origin
&& "No original load to compare against.");
13772 unsigned BitWidth
= Origin
->getValueSizeInBits(0);
13773 assert(Inst
&& "This slice is not bound to an instruction");
13774 assert(Inst
->getValueSizeInBits(0) <= BitWidth
&&
13775 "Extracted slice is bigger than the whole type!");
13776 APInt
UsedBits(Inst
->getValueSizeInBits(0), 0);
13777 UsedBits
.setAllBits();
13778 UsedBits
= UsedBits
.zext(BitWidth
);
13779 UsedBits
<<= Shift
;
13783 /// Get the size of the slice to be loaded in bytes.
13784 unsigned getLoadedSize() const {
13785 unsigned SliceSize
= getUsedBits().countPopulation();
13786 assert(!(SliceSize
& 0x7) && "Size is not a multiple of a byte.");
13787 return SliceSize
/ 8;
13790 /// Get the type that will be loaded for this slice.
13791 /// Note: This may not be the final type for the slice.
13792 EVT
getLoadedType() const {
13793 assert(DAG
&& "Missing context");
13794 LLVMContext
&Ctxt
= *DAG
->getContext();
13795 return EVT::getIntegerVT(Ctxt
, getLoadedSize() * 8);
13798 /// Get the alignment of the load used for this slice.
13799 unsigned getAlignment() const {
13800 unsigned Alignment
= Origin
->getAlignment();
13801 unsigned Offset
= getOffsetFromBase();
13803 Alignment
= MinAlign(Alignment
, Alignment
+ Offset
);
13807 /// Check if this slice can be rewritten with legal operations.
13808 bool isLegal() const {
13809 // An invalid slice is not legal.
13810 if (!Origin
|| !Inst
|| !DAG
)
13813 // Offsets are for indexed load only, we do not handle that.
13814 if (!Origin
->getOffset().isUndef())
13817 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
13819 // Check that the type is legal.
13820 EVT SliceType
= getLoadedType();
13821 if (!TLI
.isTypeLegal(SliceType
))
13824 // Check that the load is legal for this type.
13825 if (!TLI
.isOperationLegal(ISD::LOAD
, SliceType
))
13828 // Check that the offset can be computed.
13829 // 1. Check its type.
13830 EVT PtrType
= Origin
->getBasePtr().getValueType();
13831 if (PtrType
== MVT::Untyped
|| PtrType
.isExtended())
13834 // 2. Check that it fits in the immediate.
13835 if (!TLI
.isLegalAddImmediate(getOffsetFromBase()))
13838 // 3. Check that the computation is legal.
13839 if (!TLI
.isOperationLegal(ISD::ADD
, PtrType
))
13842 // Check that the zext is legal if it needs one.
13843 EVT TruncateType
= Inst
->getValueType(0);
13844 if (TruncateType
!= SliceType
&&
13845 !TLI
.isOperationLegal(ISD::ZERO_EXTEND
, TruncateType
))
13851 /// Get the offset in bytes of this slice in the original chunk of
13853 /// \pre DAG != nullptr.
13854 uint64_t getOffsetFromBase() const {
13855 assert(DAG
&& "Missing context.");
13856 bool IsBigEndian
= DAG
->getDataLayout().isBigEndian();
13857 assert(!(Shift
& 0x7) && "Shifts not aligned on Bytes are not supported.");
13858 uint64_t Offset
= Shift
/ 8;
13859 unsigned TySizeInBytes
= Origin
->getValueSizeInBits(0) / 8;
13860 assert(!(Origin
->getValueSizeInBits(0) & 0x7) &&
13861 "The size of the original loaded type is not a multiple of a"
13863 // If Offset is bigger than TySizeInBytes, it means we are loading all
13864 // zeros. This should have been optimized before in the process.
13865 assert(TySizeInBytes
> Offset
&&
13866 "Invalid shift amount for given loaded size");
13868 Offset
= TySizeInBytes
- Offset
- getLoadedSize();
13872 /// Generate the sequence of instructions to load the slice
13873 /// represented by this object and redirect the uses of this slice to
13874 /// this new sequence of instructions.
13875 /// \pre this->Inst && this->Origin are valid Instructions and this
13876 /// object passed the legal check: LoadedSlice::isLegal returned true.
13877 /// \return The last instruction of the sequence used to load the slice.
13878 SDValue
loadSlice() const {
13879 assert(Inst
&& Origin
&& "Unable to replace a non-existing slice.");
13880 const SDValue
&OldBaseAddr
= Origin
->getBasePtr();
13881 SDValue BaseAddr
= OldBaseAddr
;
13882 // Get the offset in that chunk of bytes w.r.t. the endianness.
13883 int64_t Offset
= static_cast<int64_t>(getOffsetFromBase());
13884 assert(Offset
>= 0 && "Offset too big to fit in int64_t!");
13886 // BaseAddr = BaseAddr + Offset.
13887 EVT ArithType
= BaseAddr
.getValueType();
13889 BaseAddr
= DAG
->getNode(ISD::ADD
, DL
, ArithType
, BaseAddr
,
13890 DAG
->getConstant(Offset
, DL
, ArithType
));
13893 // Create the type of the loaded slice according to its size.
13894 EVT SliceType
= getLoadedType();
13896 // Create the load for the slice.
13898 DAG
->getLoad(SliceType
, SDLoc(Origin
), Origin
->getChain(), BaseAddr
,
13899 Origin
->getPointerInfo().getWithOffset(Offset
),
13900 getAlignment(), Origin
->getMemOperand()->getFlags());
13901 // If the final type is not the same as the loaded type, this means that
13902 // we have to pad with zero. Create a zero extend for that.
13903 EVT FinalType
= Inst
->getValueType(0);
13904 if (SliceType
!= FinalType
)
13906 DAG
->getNode(ISD::ZERO_EXTEND
, SDLoc(LastInst
), FinalType
, LastInst
);
13910 /// Check if this slice can be merged with an expensive cross register
13911 /// bank copy. E.g.,
13913 /// f = bitcast i32 i to float
13914 bool canMergeExpensiveCrossRegisterBankCopy() const {
13915 if (!Inst
|| !Inst
->hasOneUse())
13917 SDNode
*Use
= *Inst
->use_begin();
13918 if (Use
->getOpcode() != ISD::BITCAST
)
13920 assert(DAG
&& "Missing context");
13921 const TargetLowering
&TLI
= DAG
->getTargetLoweringInfo();
13922 EVT ResVT
= Use
->getValueType(0);
13923 const TargetRegisterClass
*ResRC
= TLI
.getRegClassFor(ResVT
.getSimpleVT());
13924 const TargetRegisterClass
*ArgRC
=
13925 TLI
.getRegClassFor(Use
->getOperand(0).getValueType().getSimpleVT());
13926 if (ArgRC
== ResRC
|| !TLI
.isOperationLegal(ISD::LOAD
, ResVT
))
13929 // At this point, we know that we perform a cross-register-bank copy.
13930 // Check if it is expensive.
13931 const TargetRegisterInfo
*TRI
= DAG
->getSubtarget().getRegisterInfo();
13932 // Assume bitcasts are cheap, unless both register classes do not
13933 // explicitly share a common sub class.
13934 if (!TRI
|| TRI
->getCommonSubClass(ArgRC
, ResRC
))
13937 // Check if it will be merged with the load.
13938 // 1. Check the alignment constraint.
13939 unsigned RequiredAlignment
= DAG
->getDataLayout().getABITypeAlignment(
13940 ResVT
.getTypeForEVT(*DAG
->getContext()));
13942 if (RequiredAlignment
> getAlignment())
13945 // 2. Check that the load is a legal operation for that type.
13946 if (!TLI
.isOperationLegal(ISD::LOAD
, ResVT
))
13949 // 3. Check that we do not have a zext in the way.
13950 if (Inst
->getValueType(0) != getLoadedType())
13957 } // end anonymous namespace
13959 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13960 /// \p UsedBits looks like 0..0 1..1 0..0.
13961 static bool areUsedBitsDense(const APInt
&UsedBits
) {
13962 // If all the bits are one, this is dense!
13963 if (UsedBits
.isAllOnesValue())
13966 // Get rid of the unused bits on the right.
13967 APInt NarrowedUsedBits
= UsedBits
.lshr(UsedBits
.countTrailingZeros());
13968 // Get rid of the unused bits on the left.
13969 if (NarrowedUsedBits
.countLeadingZeros())
13970 NarrowedUsedBits
= NarrowedUsedBits
.trunc(NarrowedUsedBits
.getActiveBits());
13971 // Check that the chunk of bits is completely used.
13972 return NarrowedUsedBits
.isAllOnesValue();
13975 /// Check whether or not \p First and \p Second are next to each other
13976 /// in memory. This means that there is no hole between the bits loaded
13977 /// by \p First and the bits loaded by \p Second.
13978 static bool areSlicesNextToEachOther(const LoadedSlice
&First
,
13979 const LoadedSlice
&Second
) {
13980 assert(First
.Origin
== Second
.Origin
&& First
.Origin
&&
13981 "Unable to match different memory origins.");
13982 APInt UsedBits
= First
.getUsedBits();
13983 assert((UsedBits
& Second
.getUsedBits()) == 0 &&
13984 "Slices are not supposed to overlap.");
13985 UsedBits
|= Second
.getUsedBits();
13986 return areUsedBitsDense(UsedBits
);
13989 /// Adjust the \p GlobalLSCost according to the target
13990 /// paring capabilities and the layout of the slices.
13991 /// \pre \p GlobalLSCost should account for at least as many loads as
13992 /// there is in the slices in \p LoadedSlices.
13993 static void adjustCostForPairing(SmallVectorImpl
<LoadedSlice
> &LoadedSlices
,
13994 LoadedSlice::Cost
&GlobalLSCost
) {
13995 unsigned NumberOfSlices
= LoadedSlices
.size();
13996 // If there is less than 2 elements, no pairing is possible.
13997 if (NumberOfSlices
< 2)
14000 // Sort the slices so that elements that are likely to be next to each
14001 // other in memory are next to each other in the list.
14002 llvm::sort(LoadedSlices
, [](const LoadedSlice
&LHS
, const LoadedSlice
&RHS
) {
14003 assert(LHS
.Origin
== RHS
.Origin
&& "Different bases not implemented.");
14004 return LHS
.getOffsetFromBase() < RHS
.getOffsetFromBase();
14006 const TargetLowering
&TLI
= LoadedSlices
[0].DAG
->getTargetLoweringInfo();
14007 // First (resp. Second) is the first (resp. Second) potentially candidate
14008 // to be placed in a paired load.
14009 const LoadedSlice
*First
= nullptr;
14010 const LoadedSlice
*Second
= nullptr;
14011 for (unsigned CurrSlice
= 0; CurrSlice
< NumberOfSlices
; ++CurrSlice
,
14012 // Set the beginning of the pair.
14014 Second
= &LoadedSlices
[CurrSlice
];
14016 // If First is NULL, it means we start a new pair.
14017 // Get to the next slice.
14021 EVT LoadedType
= First
->getLoadedType();
14023 // If the types of the slices are different, we cannot pair them.
14024 if (LoadedType
!= Second
->getLoadedType())
14027 // Check if the target supplies paired loads for this type.
14028 unsigned RequiredAlignment
= 0;
14029 if (!TLI
.hasPairedLoad(LoadedType
, RequiredAlignment
)) {
14030 // move to the next pair, this type is hopeless.
14034 // Check if we meet the alignment requirement.
14035 if (RequiredAlignment
> First
->getAlignment())
14038 // Check that both loads are next to each other in memory.
14039 if (!areSlicesNextToEachOther(*First
, *Second
))
14042 assert(GlobalLSCost
.Loads
> 0 && "We save more loads than we created!");
14043 --GlobalLSCost
.Loads
;
14044 // Move to the next pair.
14049 /// Check the profitability of all involved LoadedSlice.
14050 /// Currently, it is considered profitable if there is exactly two
14051 /// involved slices (1) which are (2) next to each other in memory, and
14052 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14054 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14055 /// the elements themselves.
14057 /// FIXME: When the cost model will be mature enough, we can relax
14058 /// constraints (1) and (2).
14059 static bool isSlicingProfitable(SmallVectorImpl
<LoadedSlice
> &LoadedSlices
,
14060 const APInt
&UsedBits
, bool ForCodeSize
) {
14061 unsigned NumberOfSlices
= LoadedSlices
.size();
14062 if (StressLoadSlicing
)
14063 return NumberOfSlices
> 1;
14066 if (NumberOfSlices
!= 2)
14070 if (!areUsedBitsDense(UsedBits
))
14074 LoadedSlice::Cost
OrigCost(ForCodeSize
), GlobalSlicingCost(ForCodeSize
);
14075 // The original code has one big load.
14076 OrigCost
.Loads
= 1;
14077 for (unsigned CurrSlice
= 0; CurrSlice
< NumberOfSlices
; ++CurrSlice
) {
14078 const LoadedSlice
&LS
= LoadedSlices
[CurrSlice
];
14079 // Accumulate the cost of all the slices.
14080 LoadedSlice::Cost
SliceCost(LS
, ForCodeSize
);
14081 GlobalSlicingCost
+= SliceCost
;
14083 // Account as cost in the original configuration the gain obtained
14084 // with the current slices.
14085 OrigCost
.addSliceGain(LS
);
14088 // If the target supports paired load, adjust the cost accordingly.
14089 adjustCostForPairing(LoadedSlices
, GlobalSlicingCost
);
14090 return OrigCost
> GlobalSlicingCost
;
14093 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14094 /// operations, split it in the various pieces being extracted.
14096 /// This sort of thing is introduced by SROA.
14097 /// This slicing takes care not to insert overlapping loads.
14098 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14099 bool DAGCombiner::SliceUpLoad(SDNode
*N
) {
14100 if (Level
< AfterLegalizeDAG
)
14103 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
14104 if (LD
->isVolatile() || !ISD::isNormalLoad(LD
) ||
14105 !LD
->getValueType(0).isInteger())
14108 // Keep track of already used bits to detect overlapping values.
14109 // In that case, we will just abort the transformation.
14110 APInt
UsedBits(LD
->getValueSizeInBits(0), 0);
14112 SmallVector
<LoadedSlice
, 4> LoadedSlices
;
14114 // Check if this load is used as several smaller chunks of bits.
14115 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14116 // of computation for each trunc.
14117 for (SDNode::use_iterator UI
= LD
->use_begin(), UIEnd
= LD
->use_end();
14118 UI
!= UIEnd
; ++UI
) {
14119 // Skip the uses of the chain.
14120 if (UI
.getUse().getResNo() != 0)
14123 SDNode
*User
= *UI
;
14124 unsigned Shift
= 0;
14126 // Check if this is a trunc(lshr).
14127 if (User
->getOpcode() == ISD::SRL
&& User
->hasOneUse() &&
14128 isa
<ConstantSDNode
>(User
->getOperand(1))) {
14129 Shift
= User
->getConstantOperandVal(1);
14130 User
= *User
->use_begin();
14133 // At this point, User is a Truncate, iff we encountered, trunc or
14135 if (User
->getOpcode() != ISD::TRUNCATE
)
14138 // The width of the type must be a power of 2 and greater than 8-bits.
14139 // Otherwise the load cannot be represented in LLVM IR.
14140 // Moreover, if we shifted with a non-8-bits multiple, the slice
14141 // will be across several bytes. We do not support that.
14142 unsigned Width
= User
->getValueSizeInBits(0);
14143 if (Width
< 8 || !isPowerOf2_32(Width
) || (Shift
& 0x7))
14146 // Build the slice for this chain of computations.
14147 LoadedSlice
LS(User
, LD
, Shift
, &DAG
);
14148 APInt CurrentUsedBits
= LS
.getUsedBits();
14150 // Check if this slice overlaps with another.
14151 if ((CurrentUsedBits
& UsedBits
) != 0)
14153 // Update the bits used globally.
14154 UsedBits
|= CurrentUsedBits
;
14156 // Check if the new slice would be legal.
14160 // Record the slice.
14161 LoadedSlices
.push_back(LS
);
14164 // Abort slicing if it does not seem to be profitable.
14165 if (!isSlicingProfitable(LoadedSlices
, UsedBits
, ForCodeSize
))
14170 // Rewrite each chain to use an independent load.
14171 // By construction, each chain can be represented by a unique load.
14173 // Prepare the argument for the new token factor for all the slices.
14174 SmallVector
<SDValue
, 8> ArgChains
;
14175 for (SmallVectorImpl
<LoadedSlice
>::const_iterator
14176 LSIt
= LoadedSlices
.begin(),
14177 LSItEnd
= LoadedSlices
.end();
14178 LSIt
!= LSItEnd
; ++LSIt
) {
14179 SDValue SliceInst
= LSIt
->loadSlice();
14180 CombineTo(LSIt
->Inst
, SliceInst
, true);
14181 if (SliceInst
.getOpcode() != ISD::LOAD
)
14182 SliceInst
= SliceInst
.getOperand(0);
14183 assert(SliceInst
->getOpcode() == ISD::LOAD
&&
14184 "It takes more than a zext to get to the loaded slice!!");
14185 ArgChains
.push_back(SliceInst
.getValue(1));
14188 SDValue Chain
= DAG
.getNode(ISD::TokenFactor
, SDLoc(LD
), MVT::Other
,
14190 DAG
.ReplaceAllUsesOfValueWith(SDValue(N
, 1), Chain
);
14191 AddToWorklist(Chain
.getNode());
14195 /// Check to see if V is (and load (ptr), imm), where the load is having
14196 /// specific bytes cleared out. If so, return the byte size being masked out
14197 /// and the shift amount.
14198 static std::pair
<unsigned, unsigned>
14199 CheckForMaskedLoad(SDValue V
, SDValue Ptr
, SDValue Chain
) {
14200 std::pair
<unsigned, unsigned> Result(0, 0);
14202 // Check for the structure we're looking for.
14203 if (V
->getOpcode() != ISD::AND
||
14204 !isa
<ConstantSDNode
>(V
->getOperand(1)) ||
14205 !ISD::isNormalLoad(V
->getOperand(0).getNode()))
14208 // Check the chain and pointer.
14209 LoadSDNode
*LD
= cast
<LoadSDNode
>(V
->getOperand(0));
14210 if (LD
->getBasePtr() != Ptr
) return Result
; // Not from same pointer.
14212 // This only handles simple types.
14213 if (V
.getValueType() != MVT::i16
&&
14214 V
.getValueType() != MVT::i32
&&
14215 V
.getValueType() != MVT::i64
)
14218 // Check the constant mask. Invert it so that the bits being masked out are
14219 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
14220 // follow the sign bit for uniformity.
14221 uint64_t NotMask
= ~cast
<ConstantSDNode
>(V
->getOperand(1))->getSExtValue();
14222 unsigned NotMaskLZ
= countLeadingZeros(NotMask
);
14223 if (NotMaskLZ
& 7) return Result
; // Must be multiple of a byte.
14224 unsigned NotMaskTZ
= countTrailingZeros(NotMask
);
14225 if (NotMaskTZ
& 7) return Result
; // Must be multiple of a byte.
14226 if (NotMaskLZ
== 64) return Result
; // All zero mask.
14228 // See if we have a continuous run of bits. If so, we have 0*1+0*
14229 if (countTrailingOnes(NotMask
>> NotMaskTZ
) + NotMaskTZ
+ NotMaskLZ
!= 64)
14232 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14233 if (V
.getValueType() != MVT::i64
&& NotMaskLZ
)
14234 NotMaskLZ
-= 64-V
.getValueSizeInBits();
14236 unsigned MaskedBytes
= (V
.getValueSizeInBits()-NotMaskLZ
-NotMaskTZ
)/8;
14237 switch (MaskedBytes
) {
14241 default: return Result
; // All one mask, or 5-byte mask.
14244 // Verify that the first bit starts at a multiple of mask so that the access
14245 // is aligned the same as the access width.
14246 if (NotMaskTZ
&& NotMaskTZ
/8 % MaskedBytes
) return Result
;
14248 // For narrowing to be valid, it must be the case that the load the
14249 // immediately preceding memory operation before the store.
14250 if (LD
== Chain
.getNode())
14252 else if (Chain
->getOpcode() == ISD::TokenFactor
&&
14253 SDValue(LD
, 1).hasOneUse()) {
14254 // LD has only 1 chain use so they are no indirect dependencies.
14256 for (const SDValue
&ChainOp
: Chain
->op_values())
14257 if (ChainOp
.getNode() == LD
) {
14264 return Result
; // Fail.
14266 Result
.first
= MaskedBytes
;
14267 Result
.second
= NotMaskTZ
/8;
14271 /// Check to see if IVal is something that provides a value as specified by
14272 /// MaskInfo. If so, replace the specified store with a narrower store of
14273 /// truncated IVal.
14275 ShrinkLoadReplaceStoreWithStore(const std::pair
<unsigned, unsigned> &MaskInfo
,
14276 SDValue IVal
, StoreSDNode
*St
,
14278 unsigned NumBytes
= MaskInfo
.first
;
14279 unsigned ByteShift
= MaskInfo
.second
;
14280 SelectionDAG
&DAG
= DC
->getDAG();
14282 // Check to see if IVal is all zeros in the part being masked in by the 'or'
14283 // that uses this. If not, this is not a replacement.
14284 APInt Mask
= ~APInt::getBitsSet(IVal
.getValueSizeInBits(),
14285 ByteShift
*8, (ByteShift
+NumBytes
)*8);
14286 if (!DAG
.MaskedValueIsZero(IVal
, Mask
)) return nullptr;
14288 // Check that it is legal on the target to do this. It is legal if the new
14289 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14291 MVT VT
= MVT::getIntegerVT(NumBytes
*8);
14292 if (!DC
->isTypeLegal(VT
))
14295 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
14296 // shifted by ByteShift and truncated down to NumBytes.
14299 IVal
= DAG
.getNode(ISD::SRL
, DL
, IVal
.getValueType(), IVal
,
14300 DAG
.getConstant(ByteShift
*8, DL
,
14301 DC
->getShiftAmountTy(IVal
.getValueType())));
14304 // Figure out the offset for the store and the alignment of the access.
14306 unsigned NewAlign
= St
->getAlignment();
14308 if (DAG
.getDataLayout().isLittleEndian())
14309 StOffset
= ByteShift
;
14311 StOffset
= IVal
.getValueType().getStoreSize() - ByteShift
- NumBytes
;
14313 SDValue Ptr
= St
->getBasePtr();
14316 Ptr
= DAG
.getNode(ISD::ADD
, DL
, Ptr
.getValueType(),
14317 Ptr
, DAG
.getConstant(StOffset
, DL
, Ptr
.getValueType()));
14318 NewAlign
= MinAlign(NewAlign
, StOffset
);
14321 // Truncate down to the new size.
14322 IVal
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(IVal
), VT
, IVal
);
14326 .getStore(St
->getChain(), SDLoc(St
), IVal
, Ptr
,
14327 St
->getPointerInfo().getWithOffset(StOffset
), NewAlign
)
14331 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14332 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14333 /// narrowing the load and store if it would end up being a win for performance
14335 SDValue
DAGCombiner::ReduceLoadOpStoreWidth(SDNode
*N
) {
14336 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
14337 if (ST
->isVolatile())
14340 SDValue Chain
= ST
->getChain();
14341 SDValue Value
= ST
->getValue();
14342 SDValue Ptr
= ST
->getBasePtr();
14343 EVT VT
= Value
.getValueType();
14345 if (ST
->isTruncatingStore() || VT
.isVector() || !Value
.hasOneUse())
14348 unsigned Opc
= Value
.getOpcode();
14350 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14351 // is a byte mask indicating a consecutive number of bytes, check to see if
14352 // Y is known to provide just those bytes. If so, we try to replace the
14353 // load + replace + store sequence with a single (narrower) store, which makes
14355 if (Opc
== ISD::OR
) {
14356 std::pair
<unsigned, unsigned> MaskedLoad
;
14357 MaskedLoad
= CheckForMaskedLoad(Value
.getOperand(0), Ptr
, Chain
);
14358 if (MaskedLoad
.first
)
14359 if (SDNode
*NewST
= ShrinkLoadReplaceStoreWithStore(MaskedLoad
,
14360 Value
.getOperand(1), ST
,this))
14361 return SDValue(NewST
, 0);
14363 // Or is commutative, so try swapping X and Y.
14364 MaskedLoad
= CheckForMaskedLoad(Value
.getOperand(1), Ptr
, Chain
);
14365 if (MaskedLoad
.first
)
14366 if (SDNode
*NewST
= ShrinkLoadReplaceStoreWithStore(MaskedLoad
,
14367 Value
.getOperand(0), ST
,this))
14368 return SDValue(NewST
, 0);
14371 if ((Opc
!= ISD::OR
&& Opc
!= ISD::XOR
&& Opc
!= ISD::AND
) ||
14372 Value
.getOperand(1).getOpcode() != ISD::Constant
)
14375 SDValue N0
= Value
.getOperand(0);
14376 if (ISD::isNormalLoad(N0
.getNode()) && N0
.hasOneUse() &&
14377 Chain
== SDValue(N0
.getNode(), 1)) {
14378 LoadSDNode
*LD
= cast
<LoadSDNode
>(N0
);
14379 if (LD
->getBasePtr() != Ptr
||
14380 LD
->getPointerInfo().getAddrSpace() !=
14381 ST
->getPointerInfo().getAddrSpace())
14384 // Find the type to narrow it the load / op / store to.
14385 SDValue N1
= Value
.getOperand(1);
14386 unsigned BitWidth
= N1
.getValueSizeInBits();
14387 APInt Imm
= cast
<ConstantSDNode
>(N1
)->getAPIntValue();
14388 if (Opc
== ISD::AND
)
14389 Imm
^= APInt::getAllOnesValue(BitWidth
);
14390 if (Imm
== 0 || Imm
.isAllOnesValue())
14392 unsigned ShAmt
= Imm
.countTrailingZeros();
14393 unsigned MSB
= BitWidth
- Imm
.countLeadingZeros() - 1;
14394 unsigned NewBW
= NextPowerOf2(MSB
- ShAmt
);
14395 EVT NewVT
= EVT::getIntegerVT(*DAG
.getContext(), NewBW
);
14396 // The narrowing should be profitable, the load/store operation should be
14397 // legal (or custom) and the store size should be equal to the NewVT width.
14398 while (NewBW
< BitWidth
&&
14399 (NewVT
.getStoreSizeInBits() != NewBW
||
14400 !TLI
.isOperationLegalOrCustom(Opc
, NewVT
) ||
14401 !TLI
.isNarrowingProfitable(VT
, NewVT
))) {
14402 NewBW
= NextPowerOf2(NewBW
);
14403 NewVT
= EVT::getIntegerVT(*DAG
.getContext(), NewBW
);
14405 if (NewBW
>= BitWidth
)
14408 // If the lsb changed does not start at the type bitwidth boundary,
14409 // start at the previous one.
14411 ShAmt
= (((ShAmt
+ NewBW
- 1) / NewBW
) * NewBW
) - NewBW
;
14412 APInt Mask
= APInt::getBitsSet(BitWidth
, ShAmt
,
14413 std::min(BitWidth
, ShAmt
+ NewBW
));
14414 if ((Imm
& Mask
) == Imm
) {
14415 APInt NewImm
= (Imm
& Mask
).lshr(ShAmt
).trunc(NewBW
);
14416 if (Opc
== ISD::AND
)
14417 NewImm
^= APInt::getAllOnesValue(NewBW
);
14418 uint64_t PtrOff
= ShAmt
/ 8;
14419 // For big endian targets, we need to adjust the offset to the pointer to
14420 // load the correct bytes.
14421 if (DAG
.getDataLayout().isBigEndian())
14422 PtrOff
= (BitWidth
+ 7 - NewBW
) / 8 - PtrOff
;
14424 unsigned NewAlign
= MinAlign(LD
->getAlignment(), PtrOff
);
14425 Type
*NewVTTy
= NewVT
.getTypeForEVT(*DAG
.getContext());
14426 if (NewAlign
< DAG
.getDataLayout().getABITypeAlignment(NewVTTy
))
14429 SDValue NewPtr
= DAG
.getNode(ISD::ADD
, SDLoc(LD
),
14430 Ptr
.getValueType(), Ptr
,
14431 DAG
.getConstant(PtrOff
, SDLoc(LD
),
14432 Ptr
.getValueType()));
14434 DAG
.getLoad(NewVT
, SDLoc(N0
), LD
->getChain(), NewPtr
,
14435 LD
->getPointerInfo().getWithOffset(PtrOff
), NewAlign
,
14436 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
14437 SDValue NewVal
= DAG
.getNode(Opc
, SDLoc(Value
), NewVT
, NewLD
,
14438 DAG
.getConstant(NewImm
, SDLoc(Value
),
14441 DAG
.getStore(Chain
, SDLoc(N
), NewVal
, NewPtr
,
14442 ST
->getPointerInfo().getWithOffset(PtrOff
), NewAlign
);
14444 AddToWorklist(NewPtr
.getNode());
14445 AddToWorklist(NewLD
.getNode());
14446 AddToWorklist(NewVal
.getNode());
14447 WorklistRemover
DeadNodes(*this);
14448 DAG
.ReplaceAllUsesOfValueWith(N0
.getValue(1), NewLD
.getValue(1));
14457 /// For a given floating point load / store pair, if the load value isn't used
14458 /// by any other operations, then consider transforming the pair to integer
14459 /// load / store operations if the target deems the transformation profitable.
14460 SDValue
DAGCombiner::TransformFPLoadStorePair(SDNode
*N
) {
14461 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
14462 SDValue Chain
= ST
->getChain();
14463 SDValue Value
= ST
->getValue();
14464 if (ISD::isNormalStore(ST
) && ISD::isNormalLoad(Value
.getNode()) &&
14465 Value
.hasOneUse() &&
14466 Chain
== SDValue(Value
.getNode(), 1)) {
14467 LoadSDNode
*LD
= cast
<LoadSDNode
>(Value
);
14468 EVT VT
= LD
->getMemoryVT();
14469 if (!VT
.isFloatingPoint() ||
14470 VT
!= ST
->getMemoryVT() ||
14471 LD
->isNonTemporal() ||
14472 ST
->isNonTemporal() ||
14473 LD
->getPointerInfo().getAddrSpace() != 0 ||
14474 ST
->getPointerInfo().getAddrSpace() != 0)
14477 EVT IntVT
= EVT::getIntegerVT(*DAG
.getContext(), VT
.getSizeInBits());
14478 if (!TLI
.isOperationLegal(ISD::LOAD
, IntVT
) ||
14479 !TLI
.isOperationLegal(ISD::STORE
, IntVT
) ||
14480 !TLI
.isDesirableToTransformToIntegerOp(ISD::LOAD
, VT
) ||
14481 !TLI
.isDesirableToTransformToIntegerOp(ISD::STORE
, VT
))
14484 unsigned LDAlign
= LD
->getAlignment();
14485 unsigned STAlign
= ST
->getAlignment();
14486 Type
*IntVTTy
= IntVT
.getTypeForEVT(*DAG
.getContext());
14487 unsigned ABIAlign
= DAG
.getDataLayout().getABITypeAlignment(IntVTTy
);
14488 if (LDAlign
< ABIAlign
|| STAlign
< ABIAlign
)
14492 DAG
.getLoad(IntVT
, SDLoc(Value
), LD
->getChain(), LD
->getBasePtr(),
14493 LD
->getPointerInfo(), LDAlign
);
14496 DAG
.getStore(NewLD
.getValue(1), SDLoc(N
), NewLD
, ST
->getBasePtr(),
14497 ST
->getPointerInfo(), STAlign
);
14499 AddToWorklist(NewLD
.getNode());
14500 AddToWorklist(NewST
.getNode());
14501 WorklistRemover
DeadNodes(*this);
14502 DAG
.ReplaceAllUsesOfValueWith(Value
.getValue(1), NewLD
.getValue(1));
14510 // This is a helper function for visitMUL to check the profitability
14511 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14512 // MulNode is the original multiply, AddNode is (add x, c1),
14513 // and ConstNode is c2.
14515 // If the (add x, c1) has multiple uses, we could increase
14516 // the number of adds if we make this transformation.
14517 // It would only be worth doing this if we can remove a
14518 // multiply in the process. Check for that here.
14522 // We're checking for cases where we have common "c3 * A" expressions.
14523 bool DAGCombiner::isMulAddWithConstProfitable(SDNode
*MulNode
,
14525 SDValue
&ConstNode
) {
14528 // If the add only has one use, this would be OK to do.
14529 if (AddNode
.getNode()->hasOneUse())
14532 // Walk all the users of the constant with which we're multiplying.
14533 for (SDNode
*Use
: ConstNode
->uses()) {
14534 if (Use
== MulNode
) // This use is the one we're on right now. Skip it.
14537 if (Use
->getOpcode() == ISD::MUL
) { // We have another multiply use.
14539 SDNode
*MulVar
= AddNode
.getOperand(0).getNode();
14541 // OtherOp is what we're multiplying against the constant.
14542 if (Use
->getOperand(0) == ConstNode
)
14543 OtherOp
= Use
->getOperand(1).getNode();
14545 OtherOp
= Use
->getOperand(0).getNode();
14547 // Check to see if multiply is with the same operand of our "add".
14549 // ConstNode = CONST
14550 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
14552 // AddNode = (A + c1) <-- MulVar is A.
14553 // = AddNode * ConstNode <-- current visiting instruction.
14555 // If we make this transformation, we will have a common
14556 // multiply (ConstNode * A) that we can save.
14557 if (OtherOp
== MulVar
)
14560 // Now check to see if a future expansion will give us a common
14563 // ConstNode = CONST
14564 // AddNode = (A + c1)
14565 // ... = AddNode * ConstNode <-- current visiting instruction.
14567 // OtherOp = (A + c2)
14568 // Use = OtherOp * ConstNode <-- visiting Use.
14570 // If we make this transformation, we will have a common
14571 // multiply (CONST * A) after we also do the same transformation
14572 // to the "t2" instruction.
14573 if (OtherOp
->getOpcode() == ISD::ADD
&&
14574 DAG
.isConstantIntBuildVectorOrConstantInt(OtherOp
->getOperand(1)) &&
14575 OtherOp
->getOperand(0).getNode() == MulVar
)
14580 // Didn't find a case where this would be profitable.
14584 SDValue
DAGCombiner::getMergeStoreChains(SmallVectorImpl
<MemOpLink
> &StoreNodes
,
14585 unsigned NumStores
) {
14586 SmallVector
<SDValue
, 8> Chains
;
14587 SmallPtrSet
<const SDNode
*, 8> Visited
;
14588 SDLoc
StoreDL(StoreNodes
[0].MemNode
);
14590 for (unsigned i
= 0; i
< NumStores
; ++i
) {
14591 Visited
.insert(StoreNodes
[i
].MemNode
);
14594 // don't include nodes that are children or repeated nodes.
14595 for (unsigned i
= 0; i
< NumStores
; ++i
) {
14596 if (Visited
.insert(StoreNodes
[i
].MemNode
->getChain().getNode()).second
)
14597 Chains
.push_back(StoreNodes
[i
].MemNode
->getChain());
14600 assert(Chains
.size() > 0 && "Chain should have generated a chain");
14601 return DAG
.getTokenFactor(StoreDL
, Chains
);
14604 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14605 SmallVectorImpl
<MemOpLink
> &StoreNodes
, EVT MemVT
, unsigned NumStores
,
14606 bool IsConstantSrc
, bool UseVector
, bool UseTrunc
) {
14607 // Make sure we have something to merge.
14611 // The latest Node in the DAG.
14612 SDLoc
DL(StoreNodes
[0].MemNode
);
14614 int64_t ElementSizeBits
= MemVT
.getStoreSizeInBits();
14615 unsigned SizeInBits
= NumStores
* ElementSizeBits
;
14616 unsigned NumMemElts
= MemVT
.isVector() ? MemVT
.getVectorNumElements() : 1;
14620 unsigned Elts
= NumStores
* NumMemElts
;
14621 // Get the type for the merged vector store.
14622 StoreTy
= EVT::getVectorVT(*DAG
.getContext(), MemVT
.getScalarType(), Elts
);
14624 StoreTy
= EVT::getIntegerVT(*DAG
.getContext(), SizeInBits
);
14628 if (IsConstantSrc
) {
14629 SmallVector
<SDValue
, 8> BuildVector
;
14630 for (unsigned I
= 0; I
!= NumStores
; ++I
) {
14631 StoreSDNode
*St
= cast
<StoreSDNode
>(StoreNodes
[I
].MemNode
);
14632 SDValue Val
= St
->getValue();
14633 // If constant is of the wrong type, convert it now.
14634 if (MemVT
!= Val
.getValueType()) {
14635 Val
= peekThroughBitcasts(Val
);
14636 // Deal with constants of wrong size.
14637 if (ElementSizeBits
!= Val
.getValueSizeInBits()) {
14639 EVT::getIntegerVT(*DAG
.getContext(), MemVT
.getSizeInBits());
14640 if (isa
<ConstantFPSDNode
>(Val
)) {
14641 // Not clear how to truncate FP values.
14643 } else if (auto *C
= dyn_cast
<ConstantSDNode
>(Val
))
14644 Val
= DAG
.getConstant(C
->getAPIntValue()
14645 .zextOrTrunc(Val
.getValueSizeInBits())
14646 .zextOrTrunc(ElementSizeBits
),
14647 SDLoc(C
), IntMemVT
);
14649 // Make sure correctly size type is the correct type.
14650 Val
= DAG
.getBitcast(MemVT
, Val
);
14652 BuildVector
.push_back(Val
);
14654 StoredVal
= DAG
.getNode(MemVT
.isVector() ? ISD::CONCAT_VECTORS
14655 : ISD::BUILD_VECTOR
,
14656 DL
, StoreTy
, BuildVector
);
14658 SmallVector
<SDValue
, 8> Ops
;
14659 for (unsigned i
= 0; i
< NumStores
; ++i
) {
14660 StoreSDNode
*St
= cast
<StoreSDNode
>(StoreNodes
[i
].MemNode
);
14661 SDValue Val
= peekThroughBitcasts(St
->getValue());
14662 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14663 // type MemVT. If the underlying value is not the correct
14664 // type, but it is an extraction of an appropriate vector we
14665 // can recast Val to be of the correct type. This may require
14666 // converting between EXTRACT_VECTOR_ELT and
14667 // EXTRACT_SUBVECTOR.
14668 if ((MemVT
!= Val
.getValueType()) &&
14669 (Val
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
||
14670 Val
.getOpcode() == ISD::EXTRACT_SUBVECTOR
)) {
14671 EVT MemVTScalarTy
= MemVT
.getScalarType();
14672 // We may need to add a bitcast here to get types to line up.
14673 if (MemVTScalarTy
!= Val
.getValueType().getScalarType()) {
14674 Val
= DAG
.getBitcast(MemVT
, Val
);
14676 unsigned OpC
= MemVT
.isVector() ? ISD::EXTRACT_SUBVECTOR
14677 : ISD::EXTRACT_VECTOR_ELT
;
14678 SDValue Vec
= Val
.getOperand(0);
14679 SDValue Idx
= Val
.getOperand(1);
14680 Val
= DAG
.getNode(OpC
, SDLoc(Val
), MemVT
, Vec
, Idx
);
14683 Ops
.push_back(Val
);
14686 // Build the extracted vector elements back into a vector.
14687 StoredVal
= DAG
.getNode(MemVT
.isVector() ? ISD::CONCAT_VECTORS
14688 : ISD::BUILD_VECTOR
,
14692 // We should always use a vector store when merging extracted vector
14693 // elements, so this path implies a store of constants.
14694 assert(IsConstantSrc
&& "Merged vector elements should use vector store");
14696 APInt
StoreInt(SizeInBits
, 0);
14698 // Construct a single integer constant which is made of the smaller
14699 // constant inputs.
14700 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
14701 for (unsigned i
= 0; i
< NumStores
; ++i
) {
14702 unsigned Idx
= IsLE
? (NumStores
- 1 - i
) : i
;
14703 StoreSDNode
*St
= cast
<StoreSDNode
>(StoreNodes
[Idx
].MemNode
);
14705 SDValue Val
= St
->getValue();
14706 Val
= peekThroughBitcasts(Val
);
14707 StoreInt
<<= ElementSizeBits
;
14708 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Val
)) {
14709 StoreInt
|= C
->getAPIntValue()
14710 .zextOrTrunc(ElementSizeBits
)
14711 .zextOrTrunc(SizeInBits
);
14712 } else if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(Val
)) {
14713 StoreInt
|= C
->getValueAPF()
14715 .zextOrTrunc(ElementSizeBits
)
14716 .zextOrTrunc(SizeInBits
);
14717 // If fp truncation is necessary give up for now.
14718 if (MemVT
.getSizeInBits() != ElementSizeBits
)
14721 llvm_unreachable("Invalid constant element type");
14725 // Create the new Load and Store operations.
14726 StoredVal
= DAG
.getConstant(StoreInt
, DL
, StoreTy
);
14729 LSBaseSDNode
*FirstInChain
= StoreNodes
[0].MemNode
;
14730 SDValue NewChain
= getMergeStoreChains(StoreNodes
, NumStores
);
14732 // make sure we use trunc store if it's necessary to be legal.
14735 NewStore
= DAG
.getStore(NewChain
, DL
, StoredVal
, FirstInChain
->getBasePtr(),
14736 FirstInChain
->getPointerInfo(),
14737 FirstInChain
->getAlignment());
14738 } else { // Must be realized as a trunc store
14739 EVT LegalizedStoredValTy
=
14740 TLI
.getTypeToTransformTo(*DAG
.getContext(), StoredVal
.getValueType());
14741 unsigned LegalizedStoreSize
= LegalizedStoredValTy
.getSizeInBits();
14742 ConstantSDNode
*C
= cast
<ConstantSDNode
>(StoredVal
);
14743 SDValue ExtendedStoreVal
=
14744 DAG
.getConstant(C
->getAPIntValue().zextOrTrunc(LegalizedStoreSize
), DL
,
14745 LegalizedStoredValTy
);
14746 NewStore
= DAG
.getTruncStore(
14747 NewChain
, DL
, ExtendedStoreVal
, FirstInChain
->getBasePtr(),
14748 FirstInChain
->getPointerInfo(), StoredVal
.getValueType() /*TVT*/,
14749 FirstInChain
->getAlignment(),
14750 FirstInChain
->getMemOperand()->getFlags());
14753 // Replace all merged stores with the new store.
14754 for (unsigned i
= 0; i
< NumStores
; ++i
)
14755 CombineTo(StoreNodes
[i
].MemNode
, NewStore
);
14757 AddToWorklist(NewChain
.getNode());
14761 void DAGCombiner::getStoreMergeCandidates(
14762 StoreSDNode
*St
, SmallVectorImpl
<MemOpLink
> &StoreNodes
,
14763 SDNode
*&RootNode
) {
14764 // This holds the base pointer, index, and the offset in bytes from the base
14766 BaseIndexOffset BasePtr
= BaseIndexOffset::match(St
, DAG
);
14767 EVT MemVT
= St
->getMemoryVT();
14769 SDValue Val
= peekThroughBitcasts(St
->getValue());
14770 // We must have a base and an offset.
14771 if (!BasePtr
.getBase().getNode())
14774 // Do not handle stores to undef base pointers.
14775 if (BasePtr
.getBase().isUndef())
14778 bool IsConstantSrc
= isa
<ConstantSDNode
>(Val
) || isa
<ConstantFPSDNode
>(Val
);
14779 bool IsExtractVecSrc
= (Val
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
||
14780 Val
.getOpcode() == ISD::EXTRACT_SUBVECTOR
);
14781 bool IsLoadSrc
= isa
<LoadSDNode
>(Val
);
14782 BaseIndexOffset LBasePtr
;
14783 // Match on loadbaseptr if relevant.
14786 auto *Ld
= cast
<LoadSDNode
>(Val
);
14787 LBasePtr
= BaseIndexOffset::match(Ld
, DAG
);
14788 LoadVT
= Ld
->getMemoryVT();
14789 // Load and store should be the same type.
14790 if (MemVT
!= LoadVT
)
14792 // Loads must only have one use.
14793 if (!Ld
->hasNUsesOfValue(1, 0))
14795 // The memory operands must not be volatile.
14796 if (Ld
->isVolatile() || Ld
->isIndexed())
14799 auto CandidateMatch
= [&](StoreSDNode
*Other
, BaseIndexOffset
&Ptr
,
14800 int64_t &Offset
) -> bool {
14801 if (Other
->isVolatile() || Other
->isIndexed())
14803 SDValue Val
= peekThroughBitcasts(Other
->getValue());
14804 // Allow merging constants of different types as integers.
14805 bool NoTypeMatch
= (MemVT
.isInteger()) ? !MemVT
.bitsEq(Other
->getMemoryVT())
14806 : Other
->getMemoryVT() != MemVT
;
14810 // The Load's Base Ptr must also match
14811 if (LoadSDNode
*OtherLd
= dyn_cast
<LoadSDNode
>(Val
)) {
14812 auto LPtr
= BaseIndexOffset::match(OtherLd
, DAG
);
14813 if (LoadVT
!= OtherLd
->getMemoryVT())
14815 // Loads must only have one use.
14816 if (!OtherLd
->hasNUsesOfValue(1, 0))
14818 // The memory operands must not be volatile.
14819 if (OtherLd
->isVolatile() || OtherLd
->isIndexed())
14821 if (!(LBasePtr
.equalBaseIndex(LPtr
, DAG
)))
14826 if (IsConstantSrc
) {
14829 if (!(isa
<ConstantSDNode
>(Val
) || isa
<ConstantFPSDNode
>(Val
)))
14832 if (IsExtractVecSrc
) {
14833 // Do not merge truncated stores here.
14834 if (Other
->isTruncatingStore())
14836 if (!MemVT
.bitsEq(Val
.getValueType()))
14838 if (Val
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
&&
14839 Val
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
14842 Ptr
= BaseIndexOffset::match(Other
, DAG
);
14843 return (BasePtr
.equalBaseIndex(Ptr
, DAG
, Offset
));
14846 // We looking for a root node which is an ancestor to all mergable
14847 // stores. We search up through a load, to our root and then down
14848 // through all children. For instance we will find Store{1,2,3} if
14849 // St is Store1, Store2. or Store3 where the root is not a load
14850 // which always true for nonvolatile ops. TODO: Expand
14851 // the search to find all valid candidates through multiple layers of loads.
14854 // |-------|-------|
14855 // Load Load Store3
14859 // FIXME: We should be able to climb and
14860 // descend TokenFactors to find candidates as well.
14862 RootNode
= St
->getChain().getNode();
14864 unsigned NumNodesExplored
= 0;
14865 if (LoadSDNode
*Ldn
= dyn_cast
<LoadSDNode
>(RootNode
)) {
14866 RootNode
= Ldn
->getChain().getNode();
14867 for (auto I
= RootNode
->use_begin(), E
= RootNode
->use_end();
14868 I
!= E
&& NumNodesExplored
< 1024; ++I
, ++NumNodesExplored
)
14869 if (I
.getOperandNo() == 0 && isa
<LoadSDNode
>(*I
)) // walk down chain
14870 for (auto I2
= (*I
)->use_begin(), E2
= (*I
)->use_end(); I2
!= E2
; ++I2
)
14871 if (I2
.getOperandNo() == 0)
14872 if (StoreSDNode
*OtherST
= dyn_cast
<StoreSDNode
>(*I2
)) {
14873 BaseIndexOffset Ptr
;
14875 if (CandidateMatch(OtherST
, Ptr
, PtrDiff
))
14876 StoreNodes
.push_back(MemOpLink(OtherST
, PtrDiff
));
14879 for (auto I
= RootNode
->use_begin(), E
= RootNode
->use_end();
14880 I
!= E
&& NumNodesExplored
< 1024; ++I
, ++NumNodesExplored
)
14881 if (I
.getOperandNo() == 0)
14882 if (StoreSDNode
*OtherST
= dyn_cast
<StoreSDNode
>(*I
)) {
14883 BaseIndexOffset Ptr
;
14885 if (CandidateMatch(OtherST
, Ptr
, PtrDiff
))
14886 StoreNodes
.push_back(MemOpLink(OtherST
, PtrDiff
));
14890 // We need to check that merging these stores does not cause a loop in
14891 // the DAG. Any store candidate may depend on another candidate
14892 // indirectly through its operand (we already consider dependencies
14893 // through the chain). Check in parallel by searching up from
14894 // non-chain operands of candidates.
14895 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14896 SmallVectorImpl
<MemOpLink
> &StoreNodes
, unsigned NumStores
,
14897 SDNode
*RootNode
) {
14898 // FIXME: We should be able to truncate a full search of
14899 // predecessors by doing a BFS and keeping tabs the originating
14900 // stores from which worklist nodes come from in a similar way to
14901 // TokenFactor simplfication.
14903 SmallPtrSet
<const SDNode
*, 32> Visited
;
14904 SmallVector
<const SDNode
*, 8> Worklist
;
14906 // RootNode is a predecessor to all candidates so we need not search
14907 // past it. Add RootNode (peeking through TokenFactors). Do not count
14908 // these towards size check.
14910 Worklist
.push_back(RootNode
);
14911 while (!Worklist
.empty()) {
14912 auto N
= Worklist
.pop_back_val();
14913 if (!Visited
.insert(N
).second
)
14914 continue; // Already present in Visited.
14915 if (N
->getOpcode() == ISD::TokenFactor
) {
14916 for (SDValue Op
: N
->ops())
14917 Worklist
.push_back(Op
.getNode());
14921 // Don't count pruning nodes towards max.
14922 unsigned int Max
= 1024 + Visited
.size();
14923 // Search Ops of store candidates.
14924 for (unsigned i
= 0; i
< NumStores
; ++i
) {
14925 SDNode
*N
= StoreNodes
[i
].MemNode
;
14926 // Of the 4 Store Operands:
14927 // * Chain (Op 0) -> We have already considered these
14928 // in candidate selection and can be
14930 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14931 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14932 // but aren't necessarily fromt the same base node, so
14933 // cycles possible (e.g. via indexed store).
14934 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14935 // non-indexed stores). Not constant on all targets (e.g. ARM)
14936 // and so can participate in a cycle.
14937 for (unsigned j
= 1; j
< N
->getNumOperands(); ++j
)
14938 Worklist
.push_back(N
->getOperand(j
).getNode());
14940 // Search through DAG. We can stop early if we find a store node.
14941 for (unsigned i
= 0; i
< NumStores
; ++i
)
14942 if (SDNode::hasPredecessorHelper(StoreNodes
[i
].MemNode
, Visited
, Worklist
,
14948 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode
*St
) {
14949 if (OptLevel
== CodeGenOpt::None
)
14952 EVT MemVT
= St
->getMemoryVT();
14953 int64_t ElementSizeBytes
= MemVT
.getStoreSize();
14954 unsigned NumMemElts
= MemVT
.isVector() ? MemVT
.getVectorNumElements() : 1;
14956 if (MemVT
.getSizeInBits() * 2 > MaximumLegalStoreInBits
)
14959 bool NoVectors
= DAG
.getMachineFunction().getFunction().hasFnAttribute(
14960 Attribute::NoImplicitFloat
);
14962 // This function cannot currently deal with non-byte-sized memory sizes.
14963 if (ElementSizeBytes
* 8 != MemVT
.getSizeInBits())
14966 if (!MemVT
.isSimple())
14969 // Perform an early exit check. Do not bother looking at stored values that
14970 // are not constants, loads, or extracted vector elements.
14971 SDValue StoredVal
= peekThroughBitcasts(St
->getValue());
14972 bool IsLoadSrc
= isa
<LoadSDNode
>(StoredVal
);
14973 bool IsConstantSrc
= isa
<ConstantSDNode
>(StoredVal
) ||
14974 isa
<ConstantFPSDNode
>(StoredVal
);
14975 bool IsExtractVecSrc
= (StoredVal
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
||
14976 StoredVal
.getOpcode() == ISD::EXTRACT_SUBVECTOR
);
14978 if (!IsConstantSrc
&& !IsLoadSrc
&& !IsExtractVecSrc
)
14981 SmallVector
<MemOpLink
, 8> StoreNodes
;
14983 // Find potential store merge candidates by searching through chain sub-DAG
14984 getStoreMergeCandidates(St
, StoreNodes
, RootNode
);
14986 // Check if there is anything to merge.
14987 if (StoreNodes
.size() < 2)
14990 // Sort the memory operands according to their distance from the
14992 llvm::sort(StoreNodes
, [](MemOpLink LHS
, MemOpLink RHS
) {
14993 return LHS
.OffsetFromBase
< RHS
.OffsetFromBase
;
14996 // Store Merge attempts to merge the lowest stores. This generally
14997 // works out as if successful, as the remaining stores are checked
14998 // after the first collection of stores is merged. However, in the
14999 // case that a non-mergeable store is found first, e.g., {p[-2],
15000 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15001 // mergeable cases. To prevent this, we prune such stores from the
15002 // front of StoreNodes here.
15005 while (StoreNodes
.size() > 1) {
15006 unsigned StartIdx
= 0;
15007 while ((StartIdx
+ 1 < StoreNodes
.size()) &&
15008 StoreNodes
[StartIdx
].OffsetFromBase
+ ElementSizeBytes
!=
15009 StoreNodes
[StartIdx
+ 1].OffsetFromBase
)
15012 // Bail if we don't have enough candidates to merge.
15013 if (StartIdx
+ 1 >= StoreNodes
.size())
15017 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + StartIdx
);
15019 // Scan the memory operations on the chain and find the first
15020 // non-consecutive store memory address.
15021 unsigned NumConsecutiveStores
= 1;
15022 int64_t StartAddress
= StoreNodes
[0].OffsetFromBase
;
15023 // Check that the addresses are consecutive starting from the second
15024 // element in the list of stores.
15025 for (unsigned i
= 1, e
= StoreNodes
.size(); i
< e
; ++i
) {
15026 int64_t CurrAddress
= StoreNodes
[i
].OffsetFromBase
;
15027 if (CurrAddress
- StartAddress
!= (ElementSizeBytes
* i
))
15029 NumConsecutiveStores
= i
+ 1;
15032 if (NumConsecutiveStores
< 2) {
15033 StoreNodes
.erase(StoreNodes
.begin(),
15034 StoreNodes
.begin() + NumConsecutiveStores
);
15038 // The node with the lowest store address.
15039 LLVMContext
&Context
= *DAG
.getContext();
15040 const DataLayout
&DL
= DAG
.getDataLayout();
15042 // Store the constants into memory as one consecutive store.
15043 if (IsConstantSrc
) {
15044 while (NumConsecutiveStores
>= 2) {
15045 LSBaseSDNode
*FirstInChain
= StoreNodes
[0].MemNode
;
15046 unsigned FirstStoreAS
= FirstInChain
->getAddressSpace();
15047 unsigned FirstStoreAlign
= FirstInChain
->getAlignment();
15048 unsigned LastLegalType
= 1;
15049 unsigned LastLegalVectorType
= 1;
15050 bool LastIntegerTrunc
= false;
15051 bool NonZero
= false;
15052 unsigned FirstZeroAfterNonZero
= NumConsecutiveStores
;
15053 for (unsigned i
= 0; i
< NumConsecutiveStores
; ++i
) {
15054 StoreSDNode
*ST
= cast
<StoreSDNode
>(StoreNodes
[i
].MemNode
);
15055 SDValue StoredVal
= ST
->getValue();
15056 bool IsElementZero
= false;
15057 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(StoredVal
))
15058 IsElementZero
= C
->isNullValue();
15059 else if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(StoredVal
))
15060 IsElementZero
= C
->getConstantFPValue()->isNullValue();
15061 if (IsElementZero
) {
15062 if (NonZero
&& FirstZeroAfterNonZero
== NumConsecutiveStores
)
15063 FirstZeroAfterNonZero
= i
;
15065 NonZero
|= !IsElementZero
;
15067 // Find a legal type for the constant store.
15068 unsigned SizeInBits
= (i
+ 1) * ElementSizeBytes
* 8;
15069 EVT StoreTy
= EVT::getIntegerVT(Context
, SizeInBits
);
15070 bool IsFast
= false;
15072 // Break early when size is too large to be legal.
15073 if (StoreTy
.getSizeInBits() > MaximumLegalStoreInBits
)
15076 if (TLI
.isTypeLegal(StoreTy
) &&
15077 TLI
.canMergeStoresTo(FirstStoreAS
, StoreTy
, DAG
) &&
15078 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstStoreAS
,
15079 FirstStoreAlign
, &IsFast
) &&
15081 LastIntegerTrunc
= false;
15082 LastLegalType
= i
+ 1;
15083 // Or check whether a truncstore is legal.
15084 } else if (TLI
.getTypeAction(Context
, StoreTy
) ==
15085 TargetLowering::TypePromoteInteger
) {
15086 EVT LegalizedStoredValTy
=
15087 TLI
.getTypeToTransformTo(Context
, StoredVal
.getValueType());
15088 if (TLI
.isTruncStoreLegal(LegalizedStoredValTy
, StoreTy
) &&
15089 TLI
.canMergeStoresTo(FirstStoreAS
, LegalizedStoredValTy
, DAG
) &&
15090 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstStoreAS
,
15091 FirstStoreAlign
, &IsFast
) &&
15093 LastIntegerTrunc
= true;
15094 LastLegalType
= i
+ 1;
15098 // We only use vectors if the constant is known to be zero or the
15099 // target allows it and the function is not marked with the
15100 // noimplicitfloat attribute.
15102 TLI
.storeOfVectorConstantIsCheap(MemVT
, i
+ 1, FirstStoreAS
)) &&
15104 // Find a legal type for the vector store.
15105 unsigned Elts
= (i
+ 1) * NumMemElts
;
15106 EVT Ty
= EVT::getVectorVT(Context
, MemVT
.getScalarType(), Elts
);
15107 if (TLI
.isTypeLegal(Ty
) && TLI
.isTypeLegal(MemVT
) &&
15108 TLI
.canMergeStoresTo(FirstStoreAS
, Ty
, DAG
) &&
15109 TLI
.allowsMemoryAccess(Context
, DL
, Ty
, FirstStoreAS
,
15110 FirstStoreAlign
, &IsFast
) &&
15112 LastLegalVectorType
= i
+ 1;
15116 bool UseVector
= (LastLegalVectorType
> LastLegalType
) && !NoVectors
;
15117 unsigned NumElem
= (UseVector
) ? LastLegalVectorType
: LastLegalType
;
15119 // Check if we found a legal integer type that creates a meaningful
15122 // We know that candidate stores are in order and of correct
15123 // shape. While there is no mergeable sequence from the
15124 // beginning one may start later in the sequence. The only
15125 // reason a merge of size N could have failed where another of
15126 // the same size would not have, is if the alignment has
15127 // improved or we've dropped a non-zero value. Drop as many
15128 // candidates as we can here.
15129 unsigned NumSkip
= 1;
15131 (NumSkip
< NumConsecutiveStores
) &&
15132 (NumSkip
< FirstZeroAfterNonZero
) &&
15133 (StoreNodes
[NumSkip
].MemNode
->getAlignment() <= FirstStoreAlign
))
15136 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumSkip
);
15137 NumConsecutiveStores
-= NumSkip
;
15141 // Check that we can merge these candidates without causing a cycle.
15142 if (!checkMergeStoreCandidatesForDependencies(StoreNodes
, NumElem
,
15144 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumElem
);
15145 NumConsecutiveStores
-= NumElem
;
15149 RV
|= MergeStoresOfConstantsOrVecElts(StoreNodes
, MemVT
, NumElem
, true,
15150 UseVector
, LastIntegerTrunc
);
15152 // Remove merged stores for next iteration.
15153 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumElem
);
15154 NumConsecutiveStores
-= NumElem
;
15159 // When extracting multiple vector elements, try to store them
15160 // in one vector store rather than a sequence of scalar stores.
15161 if (IsExtractVecSrc
) {
15162 // Loop on Consecutive Stores on success.
15163 while (NumConsecutiveStores
>= 2) {
15164 LSBaseSDNode
*FirstInChain
= StoreNodes
[0].MemNode
;
15165 unsigned FirstStoreAS
= FirstInChain
->getAddressSpace();
15166 unsigned FirstStoreAlign
= FirstInChain
->getAlignment();
15167 unsigned NumStoresToMerge
= 1;
15168 for (unsigned i
= 0; i
< NumConsecutiveStores
; ++i
) {
15169 // Find a legal type for the vector store.
15170 unsigned Elts
= (i
+ 1) * NumMemElts
;
15172 EVT::getVectorVT(*DAG
.getContext(), MemVT
.getScalarType(), Elts
);
15175 // Break early when size is too large to be legal.
15176 if (Ty
.getSizeInBits() > MaximumLegalStoreInBits
)
15179 if (TLI
.isTypeLegal(Ty
) &&
15180 TLI
.canMergeStoresTo(FirstStoreAS
, Ty
, DAG
) &&
15181 TLI
.allowsMemoryAccess(Context
, DL
, Ty
, FirstStoreAS
,
15182 FirstStoreAlign
, &IsFast
) &&
15184 NumStoresToMerge
= i
+ 1;
15187 // Check if we found a legal integer type creating a meaningful
15189 if (NumStoresToMerge
< 2) {
15190 // We know that candidate stores are in order and of correct
15191 // shape. While there is no mergeable sequence from the
15192 // beginning one may start later in the sequence. The only
15193 // reason a merge of size N could have failed where another of
15194 // the same size would not have, is if the alignment has
15195 // improved. Drop as many candidates as we can here.
15196 unsigned NumSkip
= 1;
15198 (NumSkip
< NumConsecutiveStores
) &&
15199 (StoreNodes
[NumSkip
].MemNode
->getAlignment() <= FirstStoreAlign
))
15202 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumSkip
);
15203 NumConsecutiveStores
-= NumSkip
;
15207 // Check that we can merge these candidates without causing a cycle.
15208 if (!checkMergeStoreCandidatesForDependencies(
15209 StoreNodes
, NumStoresToMerge
, RootNode
)) {
15210 StoreNodes
.erase(StoreNodes
.begin(),
15211 StoreNodes
.begin() + NumStoresToMerge
);
15212 NumConsecutiveStores
-= NumStoresToMerge
;
15216 RV
|= MergeStoresOfConstantsOrVecElts(
15217 StoreNodes
, MemVT
, NumStoresToMerge
, false, true, false);
15219 StoreNodes
.erase(StoreNodes
.begin(),
15220 StoreNodes
.begin() + NumStoresToMerge
);
15221 NumConsecutiveStores
-= NumStoresToMerge
;
15226 // Below we handle the case of multiple consecutive stores that
15227 // come from multiple consecutive loads. We merge them into a single
15228 // wide load and a single wide store.
15230 // Look for load nodes which are used by the stored values.
15231 SmallVector
<MemOpLink
, 8> LoadNodes
;
15233 // Find acceptable loads. Loads need to have the same chain (token factor),
15234 // must not be zext, volatile, indexed, and they must be consecutive.
15235 BaseIndexOffset LdBasePtr
;
15237 for (unsigned i
= 0; i
< NumConsecutiveStores
; ++i
) {
15238 StoreSDNode
*St
= cast
<StoreSDNode
>(StoreNodes
[i
].MemNode
);
15239 SDValue Val
= peekThroughBitcasts(St
->getValue());
15240 LoadSDNode
*Ld
= cast
<LoadSDNode
>(Val
);
15242 BaseIndexOffset LdPtr
= BaseIndexOffset::match(Ld
, DAG
);
15243 // If this is not the first ptr that we check.
15244 int64_t LdOffset
= 0;
15245 if (LdBasePtr
.getBase().getNode()) {
15246 // The base ptr must be the same.
15247 if (!LdBasePtr
.equalBaseIndex(LdPtr
, DAG
, LdOffset
))
15250 // Check that all other base pointers are the same as this one.
15254 // We found a potential memory operand to merge.
15255 LoadNodes
.push_back(MemOpLink(Ld
, LdOffset
));
15258 while (NumConsecutiveStores
>= 2 && LoadNodes
.size() >= 2) {
15259 // If we have load/store pair instructions and we only have two values,
15260 // don't bother merging.
15261 unsigned RequiredAlignment
;
15262 if (LoadNodes
.size() == 2 &&
15263 TLI
.hasPairedLoad(MemVT
, RequiredAlignment
) &&
15264 StoreNodes
[0].MemNode
->getAlignment() >= RequiredAlignment
) {
15265 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + 2);
15266 LoadNodes
.erase(LoadNodes
.begin(), LoadNodes
.begin() + 2);
15269 LSBaseSDNode
*FirstInChain
= StoreNodes
[0].MemNode
;
15270 unsigned FirstStoreAS
= FirstInChain
->getAddressSpace();
15271 unsigned FirstStoreAlign
= FirstInChain
->getAlignment();
15272 LoadSDNode
*FirstLoad
= cast
<LoadSDNode
>(LoadNodes
[0].MemNode
);
15273 unsigned FirstLoadAS
= FirstLoad
->getAddressSpace();
15274 unsigned FirstLoadAlign
= FirstLoad
->getAlignment();
15276 // Scan the memory operations on the chain and find the first
15277 // non-consecutive load memory address. These variables hold the index in
15278 // the store node array.
15280 unsigned LastConsecutiveLoad
= 1;
15282 // This variable refers to the size and not index in the array.
15283 unsigned LastLegalVectorType
= 1;
15284 unsigned LastLegalIntegerType
= 1;
15285 bool isDereferenceable
= true;
15286 bool DoIntegerTruncate
= false;
15287 StartAddress
= LoadNodes
[0].OffsetFromBase
;
15288 SDValue FirstChain
= FirstLoad
->getChain();
15289 for (unsigned i
= 1; i
< LoadNodes
.size(); ++i
) {
15290 // All loads must share the same chain.
15291 if (LoadNodes
[i
].MemNode
->getChain() != FirstChain
)
15294 int64_t CurrAddress
= LoadNodes
[i
].OffsetFromBase
;
15295 if (CurrAddress
- StartAddress
!= (ElementSizeBytes
* i
))
15297 LastConsecutiveLoad
= i
;
15299 if (isDereferenceable
&& !LoadNodes
[i
].MemNode
->isDereferenceable())
15300 isDereferenceable
= false;
15302 // Find a legal type for the vector store.
15303 unsigned Elts
= (i
+ 1) * NumMemElts
;
15304 EVT StoreTy
= EVT::getVectorVT(Context
, MemVT
.getScalarType(), Elts
);
15306 // Break early when size is too large to be legal.
15307 if (StoreTy
.getSizeInBits() > MaximumLegalStoreInBits
)
15310 bool IsFastSt
, IsFastLd
;
15311 if (TLI
.isTypeLegal(StoreTy
) &&
15312 TLI
.canMergeStoresTo(FirstStoreAS
, StoreTy
, DAG
) &&
15313 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstStoreAS
,
15314 FirstStoreAlign
, &IsFastSt
) &&
15316 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstLoadAS
,
15317 FirstLoadAlign
, &IsFastLd
) &&
15319 LastLegalVectorType
= i
+ 1;
15322 // Find a legal type for the integer store.
15323 unsigned SizeInBits
= (i
+ 1) * ElementSizeBytes
* 8;
15324 StoreTy
= EVT::getIntegerVT(Context
, SizeInBits
);
15325 if (TLI
.isTypeLegal(StoreTy
) &&
15326 TLI
.canMergeStoresTo(FirstStoreAS
, StoreTy
, DAG
) &&
15327 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstStoreAS
,
15328 FirstStoreAlign
, &IsFastSt
) &&
15330 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstLoadAS
,
15331 FirstLoadAlign
, &IsFastLd
) &&
15333 LastLegalIntegerType
= i
+ 1;
15334 DoIntegerTruncate
= false;
15335 // Or check whether a truncstore and extload is legal.
15336 } else if (TLI
.getTypeAction(Context
, StoreTy
) ==
15337 TargetLowering::TypePromoteInteger
) {
15338 EVT LegalizedStoredValTy
= TLI
.getTypeToTransformTo(Context
, StoreTy
);
15339 if (TLI
.isTruncStoreLegal(LegalizedStoredValTy
, StoreTy
) &&
15340 TLI
.canMergeStoresTo(FirstStoreAS
, LegalizedStoredValTy
, DAG
) &&
15341 TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, LegalizedStoredValTy
,
15343 TLI
.isLoadExtLegal(ISD::SEXTLOAD
, LegalizedStoredValTy
,
15345 TLI
.isLoadExtLegal(ISD::EXTLOAD
, LegalizedStoredValTy
, StoreTy
) &&
15346 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstStoreAS
,
15347 FirstStoreAlign
, &IsFastSt
) &&
15349 TLI
.allowsMemoryAccess(Context
, DL
, StoreTy
, FirstLoadAS
,
15350 FirstLoadAlign
, &IsFastLd
) &&
15352 LastLegalIntegerType
= i
+ 1;
15353 DoIntegerTruncate
= true;
15358 // Only use vector types if the vector type is larger than the integer
15359 // type. If they are the same, use integers.
15361 LastLegalVectorType
> LastLegalIntegerType
&& !NoVectors
;
15362 unsigned LastLegalType
=
15363 std::max(LastLegalVectorType
, LastLegalIntegerType
);
15365 // We add +1 here because the LastXXX variables refer to location while
15366 // the NumElem refers to array/index size.
15368 std::min(NumConsecutiveStores
, LastConsecutiveLoad
+ 1);
15369 NumElem
= std::min(LastLegalType
, NumElem
);
15372 // We know that candidate stores are in order and of correct
15373 // shape. While there is no mergeable sequence from the
15374 // beginning one may start later in the sequence. The only
15375 // reason a merge of size N could have failed where another of
15376 // the same size would not have is if the alignment or either
15377 // the load or store has improved. Drop as many candidates as we
15379 unsigned NumSkip
= 1;
15380 while ((NumSkip
< LoadNodes
.size()) &&
15381 (LoadNodes
[NumSkip
].MemNode
->getAlignment() <= FirstLoadAlign
) &&
15382 (StoreNodes
[NumSkip
].MemNode
->getAlignment() <= FirstStoreAlign
))
15384 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumSkip
);
15385 LoadNodes
.erase(LoadNodes
.begin(), LoadNodes
.begin() + NumSkip
);
15386 NumConsecutiveStores
-= NumSkip
;
15390 // Check that we can merge these candidates without causing a cycle.
15391 if (!checkMergeStoreCandidatesForDependencies(StoreNodes
, NumElem
,
15393 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumElem
);
15394 LoadNodes
.erase(LoadNodes
.begin(), LoadNodes
.begin() + NumElem
);
15395 NumConsecutiveStores
-= NumElem
;
15399 // Find if it is better to use vectors or integers to load and store
15403 // Find a legal type for the vector store.
15404 unsigned Elts
= NumElem
* NumMemElts
;
15405 JointMemOpVT
= EVT::getVectorVT(Context
, MemVT
.getScalarType(), Elts
);
15407 unsigned SizeInBits
= NumElem
* ElementSizeBytes
* 8;
15408 JointMemOpVT
= EVT::getIntegerVT(Context
, SizeInBits
);
15411 SDLoc
LoadDL(LoadNodes
[0].MemNode
);
15412 SDLoc
StoreDL(StoreNodes
[0].MemNode
);
15414 // The merged loads are required to have the same incoming chain, so
15415 // using the first's chain is acceptable.
15417 SDValue NewStoreChain
= getMergeStoreChains(StoreNodes
, NumElem
);
15418 AddToWorklist(NewStoreChain
.getNode());
15420 MachineMemOperand::Flags MMOFlags
=
15421 isDereferenceable
? MachineMemOperand::MODereferenceable
15422 : MachineMemOperand::MONone
;
15424 SDValue NewLoad
, NewStore
;
15425 if (UseVectorTy
|| !DoIntegerTruncate
) {
15427 DAG
.getLoad(JointMemOpVT
, LoadDL
, FirstLoad
->getChain(),
15428 FirstLoad
->getBasePtr(), FirstLoad
->getPointerInfo(),
15429 FirstLoadAlign
, MMOFlags
);
15430 NewStore
= DAG
.getStore(
15431 NewStoreChain
, StoreDL
, NewLoad
, FirstInChain
->getBasePtr(),
15432 FirstInChain
->getPointerInfo(), FirstStoreAlign
);
15433 } else { // This must be the truncstore/extload case
15435 TLI
.getTypeToTransformTo(*DAG
.getContext(), JointMemOpVT
);
15436 NewLoad
= DAG
.getExtLoad(ISD::EXTLOAD
, LoadDL
, ExtendedTy
,
15437 FirstLoad
->getChain(), FirstLoad
->getBasePtr(),
15438 FirstLoad
->getPointerInfo(), JointMemOpVT
,
15439 FirstLoadAlign
, MMOFlags
);
15440 NewStore
= DAG
.getTruncStore(NewStoreChain
, StoreDL
, NewLoad
,
15441 FirstInChain
->getBasePtr(),
15442 FirstInChain
->getPointerInfo(),
15443 JointMemOpVT
, FirstInChain
->getAlignment(),
15444 FirstInChain
->getMemOperand()->getFlags());
15447 // Transfer chain users from old loads to the new load.
15448 for (unsigned i
= 0; i
< NumElem
; ++i
) {
15449 LoadSDNode
*Ld
= cast
<LoadSDNode
>(LoadNodes
[i
].MemNode
);
15450 DAG
.ReplaceAllUsesOfValueWith(SDValue(Ld
, 1),
15451 SDValue(NewLoad
.getNode(), 1));
15454 // Replace the all stores with the new store. Recursively remove
15455 // corresponding value if its no longer used.
15456 for (unsigned i
= 0; i
< NumElem
; ++i
) {
15457 SDValue Val
= StoreNodes
[i
].MemNode
->getOperand(1);
15458 CombineTo(StoreNodes
[i
].MemNode
, NewStore
);
15459 if (Val
.getNode()->use_empty())
15460 recursivelyDeleteUnusedNodes(Val
.getNode());
15464 StoreNodes
.erase(StoreNodes
.begin(), StoreNodes
.begin() + NumElem
);
15465 LoadNodes
.erase(LoadNodes
.begin(), LoadNodes
.begin() + NumElem
);
15466 NumConsecutiveStores
-= NumElem
;
15472 SDValue
DAGCombiner::replaceStoreChain(StoreSDNode
*ST
, SDValue BetterChain
) {
15476 // Replace the chain to avoid dependency.
15477 if (ST
->isTruncatingStore()) {
15478 ReplStore
= DAG
.getTruncStore(BetterChain
, SL
, ST
->getValue(),
15479 ST
->getBasePtr(), ST
->getMemoryVT(),
15480 ST
->getMemOperand());
15482 ReplStore
= DAG
.getStore(BetterChain
, SL
, ST
->getValue(), ST
->getBasePtr(),
15483 ST
->getMemOperand());
15486 // Create token to keep both nodes around.
15487 SDValue Token
= DAG
.getNode(ISD::TokenFactor
, SL
,
15488 MVT::Other
, ST
->getChain(), ReplStore
);
15490 // Make sure the new and old chains are cleaned up.
15491 AddToWorklist(Token
.getNode());
15493 // Don't add users to work list.
15494 return CombineTo(ST
, Token
, false);
15497 SDValue
DAGCombiner::replaceStoreOfFPConstant(StoreSDNode
*ST
) {
15498 SDValue Value
= ST
->getValue();
15499 if (Value
.getOpcode() == ISD::TargetConstantFP
)
15504 SDValue Chain
= ST
->getChain();
15505 SDValue Ptr
= ST
->getBasePtr();
15507 const ConstantFPSDNode
*CFP
= cast
<ConstantFPSDNode
>(Value
);
15509 // NOTE: If the original store is volatile, this transform must not increase
15510 // the number of stores. For example, on x86-32 an f64 can be stored in one
15511 // processor operation but an i64 (which is not legal) requires two. So the
15512 // transform should not be done in this case.
15515 switch (CFP
->getSimpleValueType(0).SimpleTy
) {
15517 llvm_unreachable("Unknown FP type");
15518 case MVT::f16
: // We don't do this for these yet.
15524 if ((isTypeLegal(MVT::i32
) && !LegalOperations
&& !ST
->isVolatile()) ||
15525 TLI
.isOperationLegalOrCustom(ISD::STORE
, MVT::i32
)) {
15527 Tmp
= DAG
.getConstant((uint32_t)CFP
->getValueAPF().
15528 bitcastToAPInt().getZExtValue(), SDLoc(CFP
),
15530 return DAG
.getStore(Chain
, DL
, Tmp
, Ptr
, ST
->getMemOperand());
15535 if ((TLI
.isTypeLegal(MVT::i64
) && !LegalOperations
&&
15536 !ST
->isVolatile()) ||
15537 TLI
.isOperationLegalOrCustom(ISD::STORE
, MVT::i64
)) {
15539 Tmp
= DAG
.getConstant(CFP
->getValueAPF().bitcastToAPInt().
15540 getZExtValue(), SDLoc(CFP
), MVT::i64
);
15541 return DAG
.getStore(Chain
, DL
, Tmp
,
15542 Ptr
, ST
->getMemOperand());
15545 if (!ST
->isVolatile() &&
15546 TLI
.isOperationLegalOrCustom(ISD::STORE
, MVT::i32
)) {
15547 // Many FP stores are not made apparent until after legalize, e.g. for
15548 // argument passing. Since this is so common, custom legalize the
15549 // 64-bit integer store into two 32-bit stores.
15550 uint64_t Val
= CFP
->getValueAPF().bitcastToAPInt().getZExtValue();
15551 SDValue Lo
= DAG
.getConstant(Val
& 0xFFFFFFFF, SDLoc(CFP
), MVT::i32
);
15552 SDValue Hi
= DAG
.getConstant(Val
>> 32, SDLoc(CFP
), MVT::i32
);
15553 if (DAG
.getDataLayout().isBigEndian())
15556 unsigned Alignment
= ST
->getAlignment();
15557 MachineMemOperand::Flags MMOFlags
= ST
->getMemOperand()->getFlags();
15558 AAMDNodes AAInfo
= ST
->getAAInfo();
15560 SDValue St0
= DAG
.getStore(Chain
, DL
, Lo
, Ptr
, ST
->getPointerInfo(),
15561 ST
->getAlignment(), MMOFlags
, AAInfo
);
15562 Ptr
= DAG
.getNode(ISD::ADD
, DL
, Ptr
.getValueType(), Ptr
,
15563 DAG
.getConstant(4, DL
, Ptr
.getValueType()));
15564 Alignment
= MinAlign(Alignment
, 4U);
15565 SDValue St1
= DAG
.getStore(Chain
, DL
, Hi
, Ptr
,
15566 ST
->getPointerInfo().getWithOffset(4),
15567 Alignment
, MMOFlags
, AAInfo
);
15568 return DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
,
15576 SDValue
DAGCombiner::visitSTORE(SDNode
*N
) {
15577 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
15578 SDValue Chain
= ST
->getChain();
15579 SDValue Value
= ST
->getValue();
15580 SDValue Ptr
= ST
->getBasePtr();
15582 // If this is a store of a bit convert, store the input value if the
15583 // resultant store does not need a higher alignment than the original.
15584 if (Value
.getOpcode() == ISD::BITCAST
&& !ST
->isTruncatingStore() &&
15585 ST
->isUnindexed()) {
15586 EVT SVT
= Value
.getOperand(0).getValueType();
15587 // If the store is volatile, we only want to change the store type if the
15588 // resulting store is legal. Otherwise we might increase the number of
15589 // memory accesses. We don't care if the original type was legal or not
15590 // as we assume software couldn't rely on the number of accesses of an
15592 if (((!LegalOperations
&& !ST
->isVolatile()) ||
15593 TLI
.isOperationLegal(ISD::STORE
, SVT
)) &&
15594 TLI
.isStoreBitCastBeneficial(Value
.getValueType(), SVT
)) {
15595 unsigned OrigAlign
= ST
->getAlignment();
15597 if (TLI
.allowsMemoryAccess(*DAG
.getContext(), DAG
.getDataLayout(), SVT
,
15598 ST
->getAddressSpace(), OrigAlign
, &Fast
) &&
15600 return DAG
.getStore(Chain
, SDLoc(N
), Value
.getOperand(0), Ptr
,
15601 ST
->getPointerInfo(), OrigAlign
,
15602 ST
->getMemOperand()->getFlags(), ST
->getAAInfo());
15607 // Turn 'store undef, Ptr' -> nothing.
15608 if (Value
.isUndef() && ST
->isUnindexed())
15611 // Try to infer better alignment information than the store already has.
15612 if (OptLevel
!= CodeGenOpt::None
&& ST
->isUnindexed()) {
15613 if (unsigned Align
= DAG
.InferPtrAlignment(Ptr
)) {
15614 if (Align
> ST
->getAlignment() && ST
->getSrcValueOffset() % Align
== 0) {
15616 DAG
.getTruncStore(Chain
, SDLoc(N
), Value
, Ptr
, ST
->getPointerInfo(),
15617 ST
->getMemoryVT(), Align
,
15618 ST
->getMemOperand()->getFlags(), ST
->getAAInfo());
15619 // NewStore will always be N as we are only refining the alignment
15620 assert(NewStore
.getNode() == N
);
15626 // Try transforming a pair floating point load / store ops to integer
15627 // load / store ops.
15628 if (SDValue NewST
= TransformFPLoadStorePair(N
))
15631 if (ST
->isUnindexed()) {
15632 // Walk up chain skipping non-aliasing memory nodes, on this store and any
15633 // adjacent stores.
15634 if (findBetterNeighborChains(ST
)) {
15635 // replaceStoreChain uses CombineTo, which handled all of the worklist
15636 // manipulation. Return the original node to not do anything else.
15637 return SDValue(ST
, 0);
15639 Chain
= ST
->getChain();
15642 // FIXME: is there such a thing as a truncating indexed store?
15643 if (ST
->isTruncatingStore() && ST
->isUnindexed() &&
15644 Value
.getValueType().isInteger() &&
15645 (!isa
<ConstantSDNode
>(Value
) ||
15646 !cast
<ConstantSDNode
>(Value
)->isOpaque())) {
15647 APInt TruncDemandedBits
=
15648 APInt::getLowBitsSet(Value
.getScalarValueSizeInBits(),
15649 ST
->getMemoryVT().getScalarSizeInBits());
15651 // See if we can simplify the input to this truncstore with knowledge that
15652 // only the low bits are being used. For example:
15653 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
15654 SDValue Shorter
= DAG
.GetDemandedBits(Value
, TruncDemandedBits
);
15655 AddToWorklist(Value
.getNode());
15657 return DAG
.getTruncStore(Chain
, SDLoc(N
), Shorter
, Ptr
, ST
->getMemoryVT(),
15658 ST
->getMemOperand());
15660 // Otherwise, see if we can simplify the operation with
15661 // SimplifyDemandedBits, which only works if the value has a single use.
15662 if (SimplifyDemandedBits(Value
, TruncDemandedBits
)) {
15663 // Re-visit the store if anything changed and the store hasn't been merged
15664 // with another node (N is deleted) SimplifyDemandedBits will add Value's
15665 // node back to the worklist if necessary, but we also need to re-visit
15666 // the Store node itself.
15667 if (N
->getOpcode() != ISD::DELETED_NODE
)
15669 return SDValue(N
, 0);
15673 // If this is a load followed by a store to the same location, then the store
15675 if (LoadSDNode
*Ld
= dyn_cast
<LoadSDNode
>(Value
)) {
15676 if (Ld
->getBasePtr() == Ptr
&& ST
->getMemoryVT() == Ld
->getMemoryVT() &&
15677 ST
->isUnindexed() && !ST
->isVolatile() &&
15678 // There can't be any side effects between the load and store, such as
15679 // a call or store.
15680 Chain
.reachesChainWithoutSideEffects(SDValue(Ld
, 1))) {
15681 // The store is dead, remove it.
15686 if (StoreSDNode
*ST1
= dyn_cast
<StoreSDNode
>(Chain
)) {
15687 if (ST
->isUnindexed() && !ST
->isVolatile() && ST1
->isUnindexed() &&
15688 !ST1
->isVolatile()) {
15689 if (ST1
->getBasePtr() == Ptr
&& ST1
->getValue() == Value
&&
15690 ST
->getMemoryVT() == ST1
->getMemoryVT()) {
15691 // If this is a store followed by a store with the same value to the
15692 // same location, then the store is dead/noop.
15696 if (OptLevel
!= CodeGenOpt::None
&& ST1
->hasOneUse() &&
15697 !ST1
->getBasePtr().isUndef()) {
15698 const BaseIndexOffset STBase
= BaseIndexOffset::match(ST
, DAG
);
15699 const BaseIndexOffset ChainBase
= BaseIndexOffset::match(ST1
, DAG
);
15700 unsigned STBitSize
= ST
->getMemoryVT().getSizeInBits();
15701 unsigned ChainBitSize
= ST1
->getMemoryVT().getSizeInBits();
15702 // If this is a store who's preceding store to a subset of the current
15703 // location and no one other node is chained to that store we can
15704 // effectively drop the store. Do not remove stores to undef as they may
15705 // be used as data sinks.
15706 if (STBase
.contains(DAG
, STBitSize
, ChainBase
, ChainBitSize
)) {
15707 CombineTo(ST1
, ST1
->getChain());
15711 // If ST stores to a subset of preceding store's write set, we may be
15712 // able to fold ST's value into the preceding stored value. As we know
15713 // the other uses of ST1's chain are unconcerned with ST, this folding
15714 // will not affect those nodes.
15716 if (ChainBase
.contains(DAG
, ChainBitSize
, STBase
, STBitSize
,
15718 SDValue ChainValue
= ST1
->getValue();
15719 if (auto *C1
= dyn_cast
<ConstantSDNode
>(ChainValue
)) {
15720 if (auto *C
= dyn_cast
<ConstantSDNode
>(Value
)) {
15721 APInt Val
= C1
->getAPIntValue();
15722 APInt InsertVal
= C
->getAPIntValue().zextOrTrunc(STBitSize
);
15723 // FIXME: Handle Big-endian mode.
15724 if (!DAG
.getDataLayout().isBigEndian()) {
15725 Val
.insertBits(InsertVal
, BitOffset
);
15727 DAG
.getConstant(Val
, SDLoc(C
), ChainValue
.getValueType(),
15728 C1
->isTargetOpcode(), C1
->isOpaque());
15729 SDNode
*NewST1
= DAG
.UpdateNodeOperands(
15730 ST1
, ST1
->getChain(), NewSDVal
, ST1
->getOperand(2),
15731 ST1
->getOperand(3));
15732 return CombineTo(ST
, SDValue(NewST1
, 0));
15736 } // End ST subset of ST1 case.
15741 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15742 // truncating store. We can do this even if this is already a truncstore.
15743 if ((Value
.getOpcode() == ISD::FP_ROUND
|| Value
.getOpcode() == ISD::TRUNCATE
)
15744 && Value
.getNode()->hasOneUse() && ST
->isUnindexed() &&
15745 TLI
.isTruncStoreLegal(Value
.getOperand(0).getValueType(),
15746 ST
->getMemoryVT())) {
15747 return DAG
.getTruncStore(Chain
, SDLoc(N
), Value
.getOperand(0),
15748 Ptr
, ST
->getMemoryVT(), ST
->getMemOperand());
15751 // Always perform this optimization before types are legal. If the target
15752 // prefers, also try this after legalization to catch stores that were created
15753 // by intrinsics or other nodes.
15754 if (!LegalTypes
|| (TLI
.mergeStoresAfterLegalization())) {
15756 // There can be multiple store sequences on the same chain.
15757 // Keep trying to merge store sequences until we are unable to do so
15758 // or until we merge the last store on the chain.
15759 bool Changed
= MergeConsecutiveStores(ST
);
15760 if (!Changed
) break;
15761 // Return N as merge only uses CombineTo and no worklist clean
15762 // up is necessary.
15763 if (N
->getOpcode() == ISD::DELETED_NODE
|| !isa
<StoreSDNode
>(N
))
15764 return SDValue(N
, 0);
15768 // Try transforming N to an indexed store.
15769 if (CombineToPreIndexedLoadStore(N
) || CombineToPostIndexedLoadStore(N
))
15770 return SDValue(N
, 0);
15772 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15774 // Make sure to do this only after attempting to merge stores in order to
15775 // avoid changing the types of some subset of stores due to visit order,
15776 // preventing their merging.
15777 if (isa
<ConstantFPSDNode
>(ST
->getValue())) {
15778 if (SDValue NewSt
= replaceStoreOfFPConstant(ST
))
15782 if (SDValue NewSt
= splitMergedValStore(ST
))
15785 return ReduceLoadOpStoreWidth(N
);
15788 SDValue
DAGCombiner::visitLIFETIME_END(SDNode
*N
) {
15789 const auto *LifetimeEnd
= cast
<LifetimeSDNode
>(N
);
15790 if (!LifetimeEnd
->hasOffset())
15793 const BaseIndexOffset
LifetimeEndBase(N
->getOperand(1), SDValue(),
15794 LifetimeEnd
->getOffset(), false);
15796 // We walk up the chains to find stores.
15797 SmallVector
<SDValue
, 8> Chains
= {N
->getOperand(0)};
15798 while (!Chains
.empty()) {
15799 SDValue Chain
= Chains
.back();
15801 if (!Chain
.hasOneUse())
15803 switch (Chain
.getOpcode()) {
15804 case ISD::TokenFactor
:
15805 for (unsigned Nops
= Chain
.getNumOperands(); Nops
;)
15806 Chains
.push_back(Chain
.getOperand(--Nops
));
15808 case ISD::LIFETIME_START
:
15809 case ISD::LIFETIME_END
:
15810 // We can forward past any lifetime start/end that can be proven not to
15812 if (!isAlias(Chain
.getNode(), N
))
15813 Chains
.push_back(Chain
.getOperand(0));
15816 StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(Chain
);
15817 if (ST
->isVolatile() || ST
->isIndexed())
15819 const BaseIndexOffset StoreBase
= BaseIndexOffset::match(ST
, DAG
);
15820 // If we store purely within object bounds just before its lifetime ends,
15821 // we can remove the store.
15822 if (LifetimeEndBase
.contains(DAG
, LifetimeEnd
->getSize() * 8, StoreBase
,
15823 ST
->getMemoryVT().getStoreSizeInBits())) {
15824 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase
.dump();
15825 dbgs() << "\nwithin LIFETIME_END of : ";
15826 LifetimeEndBase
.dump(); dbgs() << "\n");
15827 CombineTo(ST
, ST
->getChain());
15828 return SDValue(N
, 0);
15836 /// For the instruction sequence of store below, F and I values
15837 /// are bundled together as an i64 value before being stored into memory.
15838 /// Sometimes it is more efficent to generate separate stores for F and I,
15839 /// which can remove the bitwise instructions or sink them to colder places.
15841 /// (store (or (zext (bitcast F to i32) to i64),
15842 /// (shl (zext I to i64), 32)), addr) -->
15843 /// (store F, addr) and (store I, addr+4)
15845 /// Similarly, splitting for other merged store can also be beneficial, like:
15846 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15847 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15848 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15849 /// For pair of {i16, i8}, i32 store --> two i16 stores.
15850 /// For pair of {i8, i8}, i16 store --> two i8 stores.
15852 /// We allow each target to determine specifically which kind of splitting is
15855 /// The store patterns are commonly seen from the simple code snippet below
15856 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15857 /// void goo(const std::pair<int, float> &);
15860 /// goo(std::make_pair(tmp, ftmp));
15864 SDValue
DAGCombiner::splitMergedValStore(StoreSDNode
*ST
) {
15865 if (OptLevel
== CodeGenOpt::None
)
15868 SDValue Val
= ST
->getValue();
15871 // Match OR operand.
15872 if (!Val
.getValueType().isScalarInteger() || Val
.getOpcode() != ISD::OR
)
15875 // Match SHL operand and get Lower and Higher parts of Val.
15876 SDValue Op1
= Val
.getOperand(0);
15877 SDValue Op2
= Val
.getOperand(1);
15879 if (Op1
.getOpcode() != ISD::SHL
) {
15880 std::swap(Op1
, Op2
);
15881 if (Op1
.getOpcode() != ISD::SHL
)
15885 Hi
= Op1
.getOperand(0);
15886 if (!Op1
.hasOneUse())
15889 // Match shift amount to HalfValBitSize.
15890 unsigned HalfValBitSize
= Val
.getValueSizeInBits() / 2;
15891 ConstantSDNode
*ShAmt
= dyn_cast
<ConstantSDNode
>(Op1
.getOperand(1));
15892 if (!ShAmt
|| ShAmt
->getAPIntValue() != HalfValBitSize
)
15895 // Lo and Hi are zero-extended from int with size less equal than 32
15897 if (Lo
.getOpcode() != ISD::ZERO_EXTEND
|| !Lo
.hasOneUse() ||
15898 !Lo
.getOperand(0).getValueType().isScalarInteger() ||
15899 Lo
.getOperand(0).getValueSizeInBits() > HalfValBitSize
||
15900 Hi
.getOpcode() != ISD::ZERO_EXTEND
|| !Hi
.hasOneUse() ||
15901 !Hi
.getOperand(0).getValueType().isScalarInteger() ||
15902 Hi
.getOperand(0).getValueSizeInBits() > HalfValBitSize
)
15905 // Use the EVT of low and high parts before bitcast as the input
15906 // of target query.
15907 EVT LowTy
= (Lo
.getOperand(0).getOpcode() == ISD::BITCAST
)
15908 ? Lo
.getOperand(0).getValueType()
15909 : Lo
.getValueType();
15910 EVT HighTy
= (Hi
.getOperand(0).getOpcode() == ISD::BITCAST
)
15911 ? Hi
.getOperand(0).getValueType()
15912 : Hi
.getValueType();
15913 if (!TLI
.isMultiStoresCheaperThanBitsMerge(LowTy
, HighTy
))
15916 // Start to split store.
15917 unsigned Alignment
= ST
->getAlignment();
15918 MachineMemOperand::Flags MMOFlags
= ST
->getMemOperand()->getFlags();
15919 AAMDNodes AAInfo
= ST
->getAAInfo();
15921 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15922 EVT VT
= EVT::getIntegerVT(*DAG
.getContext(), HalfValBitSize
);
15923 Lo
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Lo
.getOperand(0));
15924 Hi
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, VT
, Hi
.getOperand(0));
15926 SDValue Chain
= ST
->getChain();
15927 SDValue Ptr
= ST
->getBasePtr();
15928 // Lower value store.
15929 SDValue St0
= DAG
.getStore(Chain
, DL
, Lo
, Ptr
, ST
->getPointerInfo(),
15930 ST
->getAlignment(), MMOFlags
, AAInfo
);
15932 DAG
.getNode(ISD::ADD
, DL
, Ptr
.getValueType(), Ptr
,
15933 DAG
.getConstant(HalfValBitSize
/ 8, DL
, Ptr
.getValueType()));
15934 // Higher value store.
15936 DAG
.getStore(St0
, DL
, Hi
, Ptr
,
15937 ST
->getPointerInfo().getWithOffset(HalfValBitSize
/ 8),
15938 Alignment
/ 2, MMOFlags
, AAInfo
);
15942 /// Convert a disguised subvector insertion into a shuffle:
15943 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15944 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15945 /// Note: We do not use an insert_subvector node because that requires a legal
15946 /// subvector type.
15947 SDValue
DAGCombiner::combineInsertEltToShuffle(SDNode
*N
, unsigned InsIndex
) {
15948 SDValue InsertVal
= N
->getOperand(1);
15949 if (InsertVal
.getOpcode() != ISD::BITCAST
|| !InsertVal
.hasOneUse() ||
15950 !InsertVal
.getOperand(0).getValueType().isVector())
15953 SDValue SubVec
= InsertVal
.getOperand(0);
15954 SDValue DestVec
= N
->getOperand(0);
15955 EVT SubVecVT
= SubVec
.getValueType();
15956 EVT VT
= DestVec
.getValueType();
15957 unsigned NumSrcElts
= SubVecVT
.getVectorNumElements();
15958 unsigned ExtendRatio
= VT
.getSizeInBits() / SubVecVT
.getSizeInBits();
15959 unsigned NumMaskVals
= ExtendRatio
* NumSrcElts
;
15961 // Step 1: Create a shuffle mask that implements this insert operation. The
15962 // vector that we are inserting into will be operand 0 of the shuffle, so
15963 // those elements are just 'i'. The inserted subvector is in the first
15964 // positions of operand 1 of the shuffle. Example:
15965 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15966 SmallVector
<int, 16> Mask(NumMaskVals
);
15967 for (unsigned i
= 0; i
!= NumMaskVals
; ++i
) {
15968 if (i
/ NumSrcElts
== InsIndex
)
15969 Mask
[i
] = (i
% NumSrcElts
) + NumMaskVals
;
15974 // Bail out if the target can not handle the shuffle we want to create.
15975 EVT SubVecEltVT
= SubVecVT
.getVectorElementType();
15976 EVT ShufVT
= EVT::getVectorVT(*DAG
.getContext(), SubVecEltVT
, NumMaskVals
);
15977 if (!TLI
.isShuffleMaskLegal(Mask
, ShufVT
))
15980 // Step 2: Create a wide vector from the inserted source vector by appending
15981 // undefined elements. This is the same size as our destination vector.
15983 SmallVector
<SDValue
, 8> ConcatOps(ExtendRatio
, DAG
.getUNDEF(SubVecVT
));
15984 ConcatOps
[0] = SubVec
;
15985 SDValue PaddedSubV
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, ShufVT
, ConcatOps
);
15987 // Step 3: Shuffle in the padded subvector.
15988 SDValue DestVecBC
= DAG
.getBitcast(ShufVT
, DestVec
);
15989 SDValue Shuf
= DAG
.getVectorShuffle(ShufVT
, DL
, DestVecBC
, PaddedSubV
, Mask
);
15990 AddToWorklist(PaddedSubV
.getNode());
15991 AddToWorklist(DestVecBC
.getNode());
15992 AddToWorklist(Shuf
.getNode());
15993 return DAG
.getBitcast(VT
, Shuf
);
15996 SDValue
DAGCombiner::visitINSERT_VECTOR_ELT(SDNode
*N
) {
15997 SDValue InVec
= N
->getOperand(0);
15998 SDValue InVal
= N
->getOperand(1);
15999 SDValue EltNo
= N
->getOperand(2);
16002 // If the inserted element is an UNDEF, just use the input vector.
16003 if (InVal
.isUndef())
16006 EVT VT
= InVec
.getValueType();
16007 unsigned NumElts
= VT
.getVectorNumElements();
16009 // Remove redundant insertions:
16010 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16011 if (InVal
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
16012 InVec
== InVal
.getOperand(0) && EltNo
== InVal
.getOperand(1))
16015 auto *IndexC
= dyn_cast
<ConstantSDNode
>(EltNo
);
16017 // If this is variable insert to undef vector, it might be better to splat:
16018 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16019 if (InVec
.isUndef() && TLI
.shouldSplatInsEltVarIndex(VT
)) {
16020 SmallVector
<SDValue
, 8> Ops(NumElts
, InVal
);
16021 return DAG
.getBuildVector(VT
, DL
, Ops
);
16026 // We must know which element is being inserted for folds below here.
16027 unsigned Elt
= IndexC
->getZExtValue();
16028 if (SDValue Shuf
= combineInsertEltToShuffle(N
, Elt
))
16031 // Canonicalize insert_vector_elt dag nodes.
16033 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16034 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16036 // Do this only if the child insert_vector node has one use; also
16037 // do this only if indices are both constants and Idx1 < Idx0.
16038 if (InVec
.getOpcode() == ISD::INSERT_VECTOR_ELT
&& InVec
.hasOneUse()
16039 && isa
<ConstantSDNode
>(InVec
.getOperand(2))) {
16040 unsigned OtherElt
= InVec
.getConstantOperandVal(2);
16041 if (Elt
< OtherElt
) {
16043 SDValue NewOp
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, VT
,
16044 InVec
.getOperand(0), InVal
, EltNo
);
16045 AddToWorklist(NewOp
.getNode());
16046 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(InVec
.getNode()),
16047 VT
, NewOp
, InVec
.getOperand(1), InVec
.getOperand(2));
16051 // If we can't generate a legal BUILD_VECTOR, exit
16052 if (LegalOperations
&& !TLI
.isOperationLegal(ISD::BUILD_VECTOR
, VT
))
16055 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16056 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
16057 // vector elements.
16058 SmallVector
<SDValue
, 8> Ops
;
16059 // Do not combine these two vectors if the output vector will not replace
16060 // the input vector.
16061 if (InVec
.getOpcode() == ISD::BUILD_VECTOR
&& InVec
.hasOneUse()) {
16062 Ops
.append(InVec
.getNode()->op_begin(),
16063 InVec
.getNode()->op_end());
16064 } else if (InVec
.isUndef()) {
16065 Ops
.append(NumElts
, DAG
.getUNDEF(InVal
.getValueType()));
16069 assert(Ops
.size() == NumElts
&& "Unexpected vector size");
16071 // Insert the element
16072 if (Elt
< Ops
.size()) {
16073 // All the operands of BUILD_VECTOR must have the same type;
16074 // we enforce that here.
16075 EVT OpVT
= Ops
[0].getValueType();
16076 Ops
[Elt
] = OpVT
.isInteger() ? DAG
.getAnyExtOrTrunc(InVal
, DL
, OpVT
) : InVal
;
16079 // Return the new vector
16080 return DAG
.getBuildVector(VT
, DL
, Ops
);
16083 SDValue
DAGCombiner::scalarizeExtractedVectorLoad(SDNode
*EVE
, EVT InVecVT
,
16085 LoadSDNode
*OriginalLoad
) {
16086 assert(!OriginalLoad
->isVolatile());
16088 EVT ResultVT
= EVE
->getValueType(0);
16089 EVT VecEltVT
= InVecVT
.getVectorElementType();
16090 unsigned Align
= OriginalLoad
->getAlignment();
16091 unsigned NewAlign
= DAG
.getDataLayout().getABITypeAlignment(
16092 VecEltVT
.getTypeForEVT(*DAG
.getContext()));
16094 if (NewAlign
> Align
|| !TLI
.isOperationLegalOrCustom(ISD::LOAD
, VecEltVT
))
16097 ISD::LoadExtType ExtTy
= ResultVT
.bitsGT(VecEltVT
) ?
16098 ISD::NON_EXTLOAD
: ISD::EXTLOAD
;
16099 if (!TLI
.shouldReduceLoadWidth(OriginalLoad
, ExtTy
, VecEltVT
))
16104 SDValue NewPtr
= OriginalLoad
->getBasePtr();
16106 EVT PtrType
= NewPtr
.getValueType();
16107 MachinePointerInfo MPI
;
16109 if (auto *ConstEltNo
= dyn_cast
<ConstantSDNode
>(EltNo
)) {
16110 int Elt
= ConstEltNo
->getZExtValue();
16111 unsigned PtrOff
= VecEltVT
.getSizeInBits() * Elt
/ 8;
16112 Offset
= DAG
.getConstant(PtrOff
, DL
, PtrType
);
16113 MPI
= OriginalLoad
->getPointerInfo().getWithOffset(PtrOff
);
16115 Offset
= DAG
.getZExtOrTrunc(EltNo
, DL
, PtrType
);
16116 Offset
= DAG
.getNode(
16117 ISD::MUL
, DL
, PtrType
, Offset
,
16118 DAG
.getConstant(VecEltVT
.getStoreSize(), DL
, PtrType
));
16119 // Discard the pointer info except the address space because the memory
16120 // operand can't represent this new access since the offset is variable.
16121 MPI
= MachinePointerInfo(OriginalLoad
->getPointerInfo().getAddrSpace());
16123 NewPtr
= DAG
.getNode(ISD::ADD
, DL
, PtrType
, NewPtr
, Offset
);
16125 // The replacement we need to do here is a little tricky: we need to
16126 // replace an extractelement of a load with a load.
16127 // Use ReplaceAllUsesOfValuesWith to do the replacement.
16128 // Note that this replacement assumes that the extractvalue is the only
16129 // use of the load; that's okay because we don't want to perform this
16130 // transformation in other cases anyway.
16133 if (ResultVT
.bitsGT(VecEltVT
)) {
16134 // If the result type of vextract is wider than the load, then issue an
16135 // extending load instead.
16136 ISD::LoadExtType ExtType
= TLI
.isLoadExtLegal(ISD::ZEXTLOAD
, ResultVT
,
16140 Load
= DAG
.getExtLoad(ExtType
, SDLoc(EVE
), ResultVT
,
16141 OriginalLoad
->getChain(), NewPtr
, MPI
, VecEltVT
,
16142 Align
, OriginalLoad
->getMemOperand()->getFlags(),
16143 OriginalLoad
->getAAInfo());
16144 Chain
= Load
.getValue(1);
16146 Load
= DAG
.getLoad(VecEltVT
, SDLoc(EVE
), OriginalLoad
->getChain(), NewPtr
,
16147 MPI
, Align
, OriginalLoad
->getMemOperand()->getFlags(),
16148 OriginalLoad
->getAAInfo());
16149 Chain
= Load
.getValue(1);
16150 if (ResultVT
.bitsLT(VecEltVT
))
16151 Load
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(EVE
), ResultVT
, Load
);
16153 Load
= DAG
.getBitcast(ResultVT
, Load
);
16155 WorklistRemover
DeadNodes(*this);
16156 SDValue From
[] = { SDValue(EVE
, 0), SDValue(OriginalLoad
, 1) };
16157 SDValue To
[] = { Load
, Chain
};
16158 DAG
.ReplaceAllUsesOfValuesWith(From
, To
, 2);
16159 // Since we're explicitly calling ReplaceAllUses, add the new node to the
16160 // worklist explicitly as well.
16161 AddToWorklist(Load
.getNode());
16162 AddUsersToWorklist(Load
.getNode()); // Add users too
16163 // Make sure to revisit this node to clean it up; it will usually be dead.
16164 AddToWorklist(EVE
);
16166 return SDValue(EVE
, 0);
16169 /// Transform a vector binary operation into a scalar binary operation by moving
16170 /// the math/logic after an extract element of a vector.
16171 static SDValue
scalarizeExtractedBinop(SDNode
*ExtElt
, SelectionDAG
&DAG
,
16172 bool LegalOperations
) {
16173 SDValue Vec
= ExtElt
->getOperand(0);
16174 SDValue Index
= ExtElt
->getOperand(1);
16175 auto *IndexC
= dyn_cast
<ConstantSDNode
>(Index
);
16176 if (!IndexC
|| !ISD::isBinaryOp(Vec
.getNode()) || !Vec
.hasOneUse())
16179 // Targets may want to avoid this to prevent an expensive register transfer.
16180 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
16181 if (!TLI
.shouldScalarizeBinop(Vec
))
16184 // Extracting an element of a vector constant is constant-folded, so this
16185 // transform is just replacing a vector op with a scalar op while moving the
16187 SDValue Op0
= Vec
.getOperand(0);
16188 SDValue Op1
= Vec
.getOperand(1);
16189 if (isAnyConstantBuildVector(Op0
, true) ||
16190 isAnyConstantBuildVector(Op1
, true)) {
16191 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16192 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16194 EVT VT
= ExtElt
->getValueType(0);
16195 SDValue Ext0
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, Op0
, Index
);
16196 SDValue Ext1
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, VT
, Op1
, Index
);
16197 return DAG
.getNode(Vec
.getOpcode(), DL
, VT
, Ext0
, Ext1
);
16203 SDValue
DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode
*N
) {
16204 SDValue VecOp
= N
->getOperand(0);
16205 SDValue Index
= N
->getOperand(1);
16206 EVT ScalarVT
= N
->getValueType(0);
16207 EVT VecVT
= VecOp
.getValueType();
16208 if (VecOp
.isUndef())
16209 return DAG
.getUNDEF(ScalarVT
);
16211 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16213 // This only really matters if the index is non-constant since other combines
16214 // on the constant elements already work.
16216 if (VecOp
.getOpcode() == ISD::INSERT_VECTOR_ELT
&&
16217 Index
== VecOp
.getOperand(2)) {
16218 SDValue Elt
= VecOp
.getOperand(1);
16219 return VecVT
.isInteger() ? DAG
.getAnyExtOrTrunc(Elt
, DL
, ScalarVT
) : Elt
;
16222 // (vextract (scalar_to_vector val, 0) -> val
16223 if (VecOp
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
16224 // Check if the result type doesn't match the inserted element type. A
16225 // SCALAR_TO_VECTOR may truncate the inserted element and the
16226 // EXTRACT_VECTOR_ELT may widen the extracted vector.
16227 SDValue InOp
= VecOp
.getOperand(0);
16228 if (InOp
.getValueType() != ScalarVT
) {
16229 assert(InOp
.getValueType().isInteger() && ScalarVT
.isInteger());
16230 return DAG
.getSExtOrTrunc(InOp
, DL
, ScalarVT
);
16235 // extract_vector_elt of out-of-bounds element -> UNDEF
16236 auto *IndexC
= dyn_cast
<ConstantSDNode
>(Index
);
16237 unsigned NumElts
= VecVT
.getVectorNumElements();
16238 if (IndexC
&& IndexC
->getAPIntValue().uge(NumElts
))
16239 return DAG
.getUNDEF(ScalarVT
);
16241 // extract_vector_elt (build_vector x, y), 1 -> y
16242 if (IndexC
&& VecOp
.getOpcode() == ISD::BUILD_VECTOR
&&
16243 TLI
.isTypeLegal(VecVT
) &&
16244 (VecOp
.hasOneUse() || TLI
.aggressivelyPreferBuildVectorSources(VecVT
))) {
16245 SDValue Elt
= VecOp
.getOperand(IndexC
->getZExtValue());
16246 EVT InEltVT
= Elt
.getValueType();
16248 // Sometimes build_vector's scalar input types do not match result type.
16249 if (ScalarVT
== InEltVT
)
16252 // TODO: It may be useful to truncate if free if the build_vector implicitly
16256 // TODO: These transforms should not require the 'hasOneUse' restriction, but
16257 // there are regressions on multiple targets without it. We can end up with a
16258 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16259 if (IndexC
&& VecOp
.getOpcode() == ISD::BITCAST
&& VecVT
.isInteger() &&
16260 VecOp
.hasOneUse()) {
16261 // The vector index of the LSBs of the source depend on the endian-ness.
16262 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
16263 unsigned ExtractIndex
= IndexC
->getZExtValue();
16264 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16265 unsigned BCTruncElt
= IsLE
? 0 : NumElts
- 1;
16266 SDValue BCSrc
= VecOp
.getOperand(0);
16267 if (ExtractIndex
== BCTruncElt
&& BCSrc
.getValueType().isScalarInteger())
16268 return DAG
.getNode(ISD::TRUNCATE
, DL
, ScalarVT
, BCSrc
);
16270 if (LegalTypes
&& BCSrc
.getValueType().isInteger() &&
16271 BCSrc
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
16272 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16273 // trunc i64 X to i32
16274 SDValue X
= BCSrc
.getOperand(0);
16275 assert(X
.getValueType().isScalarInteger() && ScalarVT
.isScalarInteger() &&
16276 "Extract element and scalar to vector can't change element type "
16277 "from FP to integer.");
16278 unsigned XBitWidth
= X
.getValueSizeInBits();
16279 unsigned VecEltBitWidth
= VecVT
.getScalarSizeInBits();
16280 BCTruncElt
= IsLE
? 0 : XBitWidth
/ VecEltBitWidth
- 1;
16282 // An extract element return value type can be wider than its vector
16283 // operand element type. In that case, the high bits are undefined, so
16284 // it's possible that we may need to extend rather than truncate.
16285 if (ExtractIndex
== BCTruncElt
&& XBitWidth
> VecEltBitWidth
) {
16286 assert(XBitWidth
% VecEltBitWidth
== 0 &&
16287 "Scalar bitwidth must be a multiple of vector element bitwidth");
16288 return DAG
.getAnyExtOrTrunc(X
, DL
, ScalarVT
);
16293 if (SDValue BO
= scalarizeExtractedBinop(N
, DAG
, LegalOperations
))
16296 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16297 // We only perform this optimization before the op legalization phase because
16298 // we may introduce new vector instructions which are not backed by TD
16299 // patterns. For example on AVX, extracting elements from a wide vector
16300 // without using extract_subvector. However, if we can find an underlying
16301 // scalar value, then we can always use that.
16302 if (IndexC
&& VecOp
.getOpcode() == ISD::VECTOR_SHUFFLE
) {
16303 auto *Shuf
= cast
<ShuffleVectorSDNode
>(VecOp
);
16304 // Find the new index to extract from.
16305 int OrigElt
= Shuf
->getMaskElt(IndexC
->getZExtValue());
16307 // Extracting an undef index is undef.
16309 return DAG
.getUNDEF(ScalarVT
);
16311 // Select the right vector half to extract from.
16313 if (OrigElt
< (int)NumElts
) {
16314 SVInVec
= VecOp
.getOperand(0);
16316 SVInVec
= VecOp
.getOperand(1);
16317 OrigElt
-= NumElts
;
16320 if (SVInVec
.getOpcode() == ISD::BUILD_VECTOR
) {
16321 SDValue InOp
= SVInVec
.getOperand(OrigElt
);
16322 if (InOp
.getValueType() != ScalarVT
) {
16323 assert(InOp
.getValueType().isInteger() && ScalarVT
.isInteger());
16324 InOp
= DAG
.getSExtOrTrunc(InOp
, DL
, ScalarVT
);
16330 // FIXME: We should handle recursing on other vector shuffles and
16331 // scalar_to_vector here as well.
16333 if (!LegalOperations
||
16334 // FIXME: Should really be just isOperationLegalOrCustom.
16335 TLI
.isOperationLegal(ISD::EXTRACT_VECTOR_ELT
, VecVT
) ||
16336 TLI
.isOperationExpand(ISD::VECTOR_SHUFFLE
, VecVT
)) {
16337 EVT IndexTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
16338 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, ScalarVT
, SVInVec
,
16339 DAG
.getConstant(OrigElt
, DL
, IndexTy
));
16343 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16344 // simplify it based on the (valid) extraction indices.
16345 if (llvm::all_of(VecOp
->uses(), [&](SDNode
*Use
) {
16346 return Use
->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
16347 Use
->getOperand(0) == VecOp
&&
16348 isa
<ConstantSDNode
>(Use
->getOperand(1));
16350 APInt DemandedElts
= APInt::getNullValue(NumElts
);
16351 for (SDNode
*Use
: VecOp
->uses()) {
16352 auto *CstElt
= cast
<ConstantSDNode
>(Use
->getOperand(1));
16353 if (CstElt
->getAPIntValue().ult(NumElts
))
16354 DemandedElts
.setBit(CstElt
->getZExtValue());
16356 if (SimplifyDemandedVectorElts(VecOp
, DemandedElts
, true)) {
16357 // We simplified the vector operand of this extract element. If this
16358 // extract is not dead, visit it again so it is folded properly.
16359 if (N
->getOpcode() != ISD::DELETED_NODE
)
16361 return SDValue(N
, 0);
16365 // Everything under here is trying to match an extract of a loaded value.
16366 // If the result of load has to be truncated, then it's not necessarily
16368 bool BCNumEltsChanged
= false;
16369 EVT ExtVT
= VecVT
.getVectorElementType();
16371 if (ScalarVT
.bitsLT(LVT
) && !TLI
.isTruncateFree(LVT
, ScalarVT
))
16374 if (VecOp
.getOpcode() == ISD::BITCAST
) {
16375 // Don't duplicate a load with other uses.
16376 if (!VecOp
.hasOneUse())
16379 EVT BCVT
= VecOp
.getOperand(0).getValueType();
16380 if (!BCVT
.isVector() || ExtVT
.bitsGT(BCVT
.getVectorElementType()))
16382 if (NumElts
!= BCVT
.getVectorNumElements())
16383 BCNumEltsChanged
= true;
16384 VecOp
= VecOp
.getOperand(0);
16385 ExtVT
= BCVT
.getVectorElementType();
16388 // extract (vector load $addr), i --> load $addr + i * size
16389 if (!LegalOperations
&& !IndexC
&& VecOp
.hasOneUse() &&
16390 ISD::isNormalLoad(VecOp
.getNode()) &&
16391 !Index
->hasPredecessor(VecOp
.getNode())) {
16392 auto *VecLoad
= dyn_cast
<LoadSDNode
>(VecOp
);
16393 if (VecLoad
&& !VecLoad
->isVolatile())
16394 return scalarizeExtractedVectorLoad(N
, VecVT
, Index
, VecLoad
);
16397 // Perform only after legalization to ensure build_vector / vector_shuffle
16398 // optimizations have already been done.
16399 if (!LegalOperations
|| !IndexC
)
16402 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16403 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16404 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16405 int Elt
= IndexC
->getZExtValue();
16406 LoadSDNode
*LN0
= nullptr;
16407 if (ISD::isNormalLoad(VecOp
.getNode())) {
16408 LN0
= cast
<LoadSDNode
>(VecOp
);
16409 } else if (VecOp
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
16410 VecOp
.getOperand(0).getValueType() == ExtVT
&&
16411 ISD::isNormalLoad(VecOp
.getOperand(0).getNode())) {
16412 // Don't duplicate a load with other uses.
16413 if (!VecOp
.hasOneUse())
16416 LN0
= cast
<LoadSDNode
>(VecOp
.getOperand(0));
16418 if (auto *Shuf
= dyn_cast
<ShuffleVectorSDNode
>(VecOp
)) {
16419 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16421 // (load $addr+1*size)
16423 // Don't duplicate a load with other uses.
16424 if (!VecOp
.hasOneUse())
16427 // If the bit convert changed the number of elements, it is unsafe
16428 // to examine the mask.
16429 if (BCNumEltsChanged
)
16432 // Select the input vector, guarding against out of range extract vector.
16433 int Idx
= (Elt
> (int)NumElts
) ? -1 : Shuf
->getMaskElt(Elt
);
16434 VecOp
= (Idx
< (int)NumElts
) ? VecOp
.getOperand(0) : VecOp
.getOperand(1);
16436 if (VecOp
.getOpcode() == ISD::BITCAST
) {
16437 // Don't duplicate a load with other uses.
16438 if (!VecOp
.hasOneUse())
16441 VecOp
= VecOp
.getOperand(0);
16443 if (ISD::isNormalLoad(VecOp
.getNode())) {
16444 LN0
= cast
<LoadSDNode
>(VecOp
);
16445 Elt
= (Idx
< (int)NumElts
) ? Idx
: Idx
- (int)NumElts
;
16446 Index
= DAG
.getConstant(Elt
, DL
, Index
.getValueType());
16450 // Make sure we found a non-volatile load and the extractelement is
16452 if (!LN0
|| !LN0
->hasNUsesOfValue(1,0) || LN0
->isVolatile())
16455 // If Idx was -1 above, Elt is going to be -1, so just return undef.
16457 return DAG
.getUNDEF(LVT
);
16459 return scalarizeExtractedVectorLoad(N
, VecVT
, Index
, LN0
);
16462 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16463 SDValue
DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode
*N
) {
16464 // We perform this optimization post type-legalization because
16465 // the type-legalizer often scalarizes integer-promoted vectors.
16466 // Performing this optimization before may create bit-casts which
16467 // will be type-legalized to complex code sequences.
16468 // We perform this optimization only before the operation legalizer because we
16469 // may introduce illegal operations.
16470 if (Level
!= AfterLegalizeVectorOps
&& Level
!= AfterLegalizeTypes
)
16473 unsigned NumInScalars
= N
->getNumOperands();
16475 EVT VT
= N
->getValueType(0);
16477 // Check to see if this is a BUILD_VECTOR of a bunch of values
16478 // which come from any_extend or zero_extend nodes. If so, we can create
16479 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16480 // optimizations. We do not handle sign-extend because we can't fill the sign
16482 EVT SourceType
= MVT::Other
;
16483 bool AllAnyExt
= true;
16485 for (unsigned i
= 0; i
!= NumInScalars
; ++i
) {
16486 SDValue In
= N
->getOperand(i
);
16487 // Ignore undef inputs.
16488 if (In
.isUndef()) continue;
16490 bool AnyExt
= In
.getOpcode() == ISD::ANY_EXTEND
;
16491 bool ZeroExt
= In
.getOpcode() == ISD::ZERO_EXTEND
;
16493 // Abort if the element is not an extension.
16494 if (!ZeroExt
&& !AnyExt
) {
16495 SourceType
= MVT::Other
;
16499 // The input is a ZeroExt or AnyExt. Check the original type.
16500 EVT InTy
= In
.getOperand(0).getValueType();
16502 // Check that all of the widened source types are the same.
16503 if (SourceType
== MVT::Other
)
16506 else if (InTy
!= SourceType
) {
16507 // Multiple income types. Abort.
16508 SourceType
= MVT::Other
;
16512 // Check if all of the extends are ANY_EXTENDs.
16513 AllAnyExt
&= AnyExt
;
16516 // In order to have valid types, all of the inputs must be extended from the
16517 // same source type and all of the inputs must be any or zero extend.
16518 // Scalar sizes must be a power of two.
16519 EVT OutScalarTy
= VT
.getScalarType();
16520 bool ValidTypes
= SourceType
!= MVT::Other
&&
16521 isPowerOf2_32(OutScalarTy
.getSizeInBits()) &&
16522 isPowerOf2_32(SourceType
.getSizeInBits());
16524 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16525 // turn into a single shuffle instruction.
16529 bool isLE
= DAG
.getDataLayout().isLittleEndian();
16530 unsigned ElemRatio
= OutScalarTy
.getSizeInBits()/SourceType
.getSizeInBits();
16531 assert(ElemRatio
> 1 && "Invalid element size ratio");
16532 SDValue Filler
= AllAnyExt
? DAG
.getUNDEF(SourceType
):
16533 DAG
.getConstant(0, DL
, SourceType
);
16535 unsigned NewBVElems
= ElemRatio
* VT
.getVectorNumElements();
16536 SmallVector
<SDValue
, 8> Ops(NewBVElems
, Filler
);
16538 // Populate the new build_vector
16539 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
16540 SDValue Cast
= N
->getOperand(i
);
16541 assert((Cast
.getOpcode() == ISD::ANY_EXTEND
||
16542 Cast
.getOpcode() == ISD::ZERO_EXTEND
||
16543 Cast
.isUndef()) && "Invalid cast opcode");
16545 if (Cast
.isUndef())
16546 In
= DAG
.getUNDEF(SourceType
);
16548 In
= Cast
->getOperand(0);
16549 unsigned Index
= isLE
? (i
* ElemRatio
) :
16550 (i
* ElemRatio
+ (ElemRatio
- 1));
16552 assert(Index
< Ops
.size() && "Invalid index");
16556 // The type of the new BUILD_VECTOR node.
16557 EVT VecVT
= EVT::getVectorVT(*DAG
.getContext(), SourceType
, NewBVElems
);
16558 assert(VecVT
.getSizeInBits() == VT
.getSizeInBits() &&
16559 "Invalid vector size");
16560 // Check if the new vector type is legal.
16561 if (!isTypeLegal(VecVT
) ||
16562 (!TLI
.isOperationLegal(ISD::BUILD_VECTOR
, VecVT
) &&
16563 TLI
.isOperationLegal(ISD::BUILD_VECTOR
, VT
)))
16566 // Make the new BUILD_VECTOR.
16567 SDValue BV
= DAG
.getBuildVector(VecVT
, DL
, Ops
);
16569 // The new BUILD_VECTOR node has the potential to be further optimized.
16570 AddToWorklist(BV
.getNode());
16571 // Bitcast to the desired type.
16572 return DAG
.getBitcast(VT
, BV
);
16575 SDValue
DAGCombiner::createBuildVecShuffle(const SDLoc
&DL
, SDNode
*N
,
16576 ArrayRef
<int> VectorMask
,
16577 SDValue VecIn1
, SDValue VecIn2
,
16578 unsigned LeftIdx
, bool DidSplitVec
) {
16579 MVT IdxTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
16580 SDValue ZeroIdx
= DAG
.getConstant(0, DL
, IdxTy
);
16582 EVT VT
= N
->getValueType(0);
16583 EVT InVT1
= VecIn1
.getValueType();
16584 EVT InVT2
= VecIn2
.getNode() ? VecIn2
.getValueType() : InVT1
;
16586 unsigned NumElems
= VT
.getVectorNumElements();
16587 unsigned ShuffleNumElems
= NumElems
;
16589 // If we artificially split a vector in two already, then the offsets in the
16590 // operands will all be based off of VecIn1, even those in VecIn2.
16591 unsigned Vec2Offset
= DidSplitVec
? 0 : InVT1
.getVectorNumElements();
16593 // We can't generate a shuffle node with mismatched input and output types.
16594 // Try to make the types match the type of the output.
16595 if (InVT1
!= VT
|| InVT2
!= VT
) {
16596 if ((VT
.getSizeInBits() % InVT1
.getSizeInBits() == 0) && InVT1
== InVT2
) {
16597 // If the output vector length is a multiple of both input lengths,
16598 // we can concatenate them and pad the rest with undefs.
16599 unsigned NumConcats
= VT
.getSizeInBits() / InVT1
.getSizeInBits();
16600 assert(NumConcats
>= 2 && "Concat needs at least two inputs!");
16601 SmallVector
<SDValue
, 2> ConcatOps(NumConcats
, DAG
.getUNDEF(InVT1
));
16602 ConcatOps
[0] = VecIn1
;
16603 ConcatOps
[1] = VecIn2
? VecIn2
: DAG
.getUNDEF(InVT1
);
16604 VecIn1
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, ConcatOps
);
16605 VecIn2
= SDValue();
16606 } else if (InVT1
.getSizeInBits() == VT
.getSizeInBits() * 2) {
16607 if (!TLI
.isExtractSubvectorCheap(VT
, InVT1
, NumElems
))
16610 if (!VecIn2
.getNode()) {
16611 // If we only have one input vector, and it's twice the size of the
16612 // output, split it in two.
16613 VecIn2
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, VecIn1
,
16614 DAG
.getConstant(NumElems
, DL
, IdxTy
));
16615 VecIn1
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, VecIn1
, ZeroIdx
);
16616 // Since we now have shorter input vectors, adjust the offset of the
16617 // second vector's start.
16618 Vec2Offset
= NumElems
;
16619 } else if (InVT2
.getSizeInBits() <= InVT1
.getSizeInBits()) {
16620 // VecIn1 is wider than the output, and we have another, possibly
16621 // smaller input. Pad the smaller input with undefs, shuffle at the
16622 // input vector width, and extract the output.
16623 // The shuffle type is different than VT, so check legality again.
16624 if (LegalOperations
&&
16625 !TLI
.isOperationLegal(ISD::VECTOR_SHUFFLE
, InVT1
))
16628 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16629 // lower it back into a BUILD_VECTOR. So if the inserted type is
16630 // illegal, don't even try.
16631 if (InVT1
!= InVT2
) {
16632 if (!TLI
.isTypeLegal(InVT2
))
16634 VecIn2
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, InVT1
,
16635 DAG
.getUNDEF(InVT1
), VecIn2
, ZeroIdx
);
16637 ShuffleNumElems
= NumElems
* 2;
16639 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16640 // than VecIn1. We can't handle this for now - this case will disappear
16641 // when we start sorting the vectors by type.
16644 } else if (InVT2
.getSizeInBits() * 2 == VT
.getSizeInBits() &&
16645 InVT1
.getSizeInBits() == VT
.getSizeInBits()) {
16646 SmallVector
<SDValue
, 2> ConcatOps(2, DAG
.getUNDEF(InVT2
));
16647 ConcatOps
[0] = VecIn2
;
16648 VecIn2
= DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, ConcatOps
);
16650 // TODO: Support cases where the length mismatch isn't exactly by a
16652 // TODO: Move this check upwards, so that if we have bad type
16653 // mismatches, we don't create any DAG nodes.
16658 // Initialize mask to undef.
16659 SmallVector
<int, 8> Mask(ShuffleNumElems
, -1);
16661 // Only need to run up to the number of elements actually used, not the
16662 // total number of elements in the shuffle - if we are shuffling a wider
16663 // vector, the high lanes should be set to undef.
16664 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
16665 if (VectorMask
[i
] <= 0)
16668 unsigned ExtIndex
= N
->getOperand(i
).getConstantOperandVal(1);
16669 if (VectorMask
[i
] == (int)LeftIdx
) {
16670 Mask
[i
] = ExtIndex
;
16671 } else if (VectorMask
[i
] == (int)LeftIdx
+ 1) {
16672 Mask
[i
] = Vec2Offset
+ ExtIndex
;
16676 // The type the input vectors may have changed above.
16677 InVT1
= VecIn1
.getValueType();
16679 // If we already have a VecIn2, it should have the same type as VecIn1.
16680 // If we don't, get an undef/zero vector of the appropriate type.
16681 VecIn2
= VecIn2
.getNode() ? VecIn2
: DAG
.getUNDEF(InVT1
);
16682 assert(InVT1
== VecIn2
.getValueType() && "Unexpected second input type.");
16684 SDValue Shuffle
= DAG
.getVectorShuffle(InVT1
, DL
, VecIn1
, VecIn2
, Mask
);
16685 if (ShuffleNumElems
> NumElems
)
16686 Shuffle
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, VT
, Shuffle
, ZeroIdx
);
16691 static SDValue
reduceBuildVecToShuffleWithZero(SDNode
*BV
, SelectionDAG
&DAG
) {
16692 assert(BV
->getOpcode() == ISD::BUILD_VECTOR
&& "Expected build vector");
16694 // First, determine where the build vector is not undef.
16695 // TODO: We could extend this to handle zero elements as well as undefs.
16696 int NumBVOps
= BV
->getNumOperands();
16698 for (int i
= 0; i
!= NumBVOps
; ++i
) {
16699 SDValue Op
= BV
->getOperand(i
);
16707 // Bail out if there's no non-undef element.
16711 // The build vector contains some number of undef elements and exactly
16712 // one other element. That other element must be a zero-extended scalar
16713 // extracted from a vector at a constant index to turn this into a shuffle.
16714 // Also, require that the build vector does not implicitly truncate/extend
16716 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16717 EVT VT
= BV
->getValueType(0);
16718 SDValue Zext
= BV
->getOperand(ZextElt
);
16719 if (Zext
.getOpcode() != ISD::ZERO_EXTEND
|| !Zext
.hasOneUse() ||
16720 Zext
.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
16721 !isa
<ConstantSDNode
>(Zext
.getOperand(0).getOperand(1)) ||
16722 Zext
.getValueSizeInBits() != VT
.getScalarSizeInBits())
16725 // The zero-extend must be a multiple of the source size, and we must be
16726 // building a vector of the same size as the source of the extract element.
16727 SDValue Extract
= Zext
.getOperand(0);
16728 unsigned DestSize
= Zext
.getValueSizeInBits();
16729 unsigned SrcSize
= Extract
.getValueSizeInBits();
16730 if (DestSize
% SrcSize
!= 0 ||
16731 Extract
.getOperand(0).getValueSizeInBits() != VT
.getSizeInBits())
16734 // Create a shuffle mask that will combine the extracted element with zeros
16736 int ZextRatio
= DestSize
/ SrcSize
;
16737 int NumMaskElts
= NumBVOps
* ZextRatio
;
16738 SmallVector
<int, 32> ShufMask(NumMaskElts
, -1);
16739 for (int i
= 0; i
!= NumMaskElts
; ++i
) {
16740 if (i
/ ZextRatio
== ZextElt
) {
16741 // The low bits of the (potentially translated) extracted element map to
16742 // the source vector. The high bits map to zero. We will use a zero vector
16743 // as the 2nd source operand of the shuffle, so use the 1st element of
16744 // that vector (mask value is number-of-elements) for the high bits.
16745 if (i
% ZextRatio
== 0)
16746 ShufMask
[i
] = Extract
.getConstantOperandVal(1);
16748 ShufMask
[i
] = NumMaskElts
;
16751 // Undef elements of the build vector remain undef because we initialize
16752 // the shuffle mask with -1.
16755 // Turn this into a shuffle with zero if that's legal.
16756 EVT VecVT
= Extract
.getOperand(0).getValueType();
16757 if (!DAG
.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask
, VecVT
))
16760 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16761 // bitcast (shuffle V, ZeroVec, VectorMask)
16763 SDValue ZeroVec
= DAG
.getConstant(0, DL
, VecVT
);
16764 SDValue Shuf
= DAG
.getVectorShuffle(VecVT
, DL
, Extract
.getOperand(0), ZeroVec
,
16766 return DAG
.getBitcast(VT
, Shuf
);
16769 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16770 // operations. If the types of the vectors we're extracting from allow it,
16771 // turn this into a vector_shuffle node.
16772 SDValue
DAGCombiner::reduceBuildVecToShuffle(SDNode
*N
) {
16774 EVT VT
= N
->getValueType(0);
16776 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16777 if (!isTypeLegal(VT
))
16780 if (SDValue V
= reduceBuildVecToShuffleWithZero(N
, DAG
))
16783 // May only combine to shuffle after legalize if shuffle is legal.
16784 if (LegalOperations
&& !TLI
.isOperationLegal(ISD::VECTOR_SHUFFLE
, VT
))
16787 bool UsesZeroVector
= false;
16788 unsigned NumElems
= N
->getNumOperands();
16790 // Record, for each element of the newly built vector, which input vector
16791 // that element comes from. -1 stands for undef, 0 for the zero vector,
16792 // and positive values for the input vectors.
16793 // VectorMask maps each element to its vector number, and VecIn maps vector
16794 // numbers to their initial SDValues.
16796 SmallVector
<int, 8> VectorMask(NumElems
, -1);
16797 SmallVector
<SDValue
, 8> VecIn
;
16798 VecIn
.push_back(SDValue());
16800 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
16801 SDValue Op
= N
->getOperand(i
);
16806 // See if we can use a blend with a zero vector.
16807 // TODO: Should we generalize this to a blend with an arbitrary constant
16809 if (isNullConstant(Op
) || isNullFPConstant(Op
)) {
16810 UsesZeroVector
= true;
16815 // Not an undef or zero. If the input is something other than an
16816 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16817 if (Op
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
16818 !isa
<ConstantSDNode
>(Op
.getOperand(1)))
16820 SDValue ExtractedFromVec
= Op
.getOperand(0);
16822 const APInt
&ExtractIdx
= Op
.getConstantOperandAPInt(1);
16823 if (ExtractIdx
.uge(ExtractedFromVec
.getValueType().getVectorNumElements()))
16826 // All inputs must have the same element type as the output.
16827 if (VT
.getVectorElementType() !=
16828 ExtractedFromVec
.getValueType().getVectorElementType())
16831 // Have we seen this input vector before?
16832 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16833 // a map back from SDValues to numbers isn't worth it.
16834 unsigned Idx
= std::distance(
16835 VecIn
.begin(), std::find(VecIn
.begin(), VecIn
.end(), ExtractedFromVec
));
16836 if (Idx
== VecIn
.size())
16837 VecIn
.push_back(ExtractedFromVec
);
16839 VectorMask
[i
] = Idx
;
16842 // If we didn't find at least one input vector, bail out.
16843 if (VecIn
.size() < 2)
16846 // If all the Operands of BUILD_VECTOR extract from same
16847 // vector, then split the vector efficiently based on the maximum
16848 // vector access index and adjust the VectorMask and
16849 // VecIn accordingly.
16850 bool DidSplitVec
= false;
16851 if (VecIn
.size() == 2) {
16852 unsigned MaxIndex
= 0;
16853 unsigned NearestPow2
= 0;
16854 SDValue Vec
= VecIn
.back();
16855 EVT InVT
= Vec
.getValueType();
16856 MVT IdxTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
16857 SmallVector
<unsigned, 8> IndexVec(NumElems
, 0);
16859 for (unsigned i
= 0; i
< NumElems
; i
++) {
16860 if (VectorMask
[i
] <= 0)
16862 unsigned Index
= N
->getOperand(i
).getConstantOperandVal(1);
16863 IndexVec
[i
] = Index
;
16864 MaxIndex
= std::max(MaxIndex
, Index
);
16867 NearestPow2
= PowerOf2Ceil(MaxIndex
);
16868 if (InVT
.isSimple() && NearestPow2
> 2 && MaxIndex
< NearestPow2
&&
16869 NumElems
* 2 < NearestPow2
) {
16870 unsigned SplitSize
= NearestPow2
/ 2;
16871 EVT SplitVT
= EVT::getVectorVT(*DAG
.getContext(),
16872 InVT
.getVectorElementType(), SplitSize
);
16873 if (TLI
.isTypeLegal(SplitVT
)) {
16874 SDValue VecIn2
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SplitVT
, Vec
,
16875 DAG
.getConstant(SplitSize
, DL
, IdxTy
));
16876 SDValue VecIn1
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, SplitVT
, Vec
,
16877 DAG
.getConstant(0, DL
, IdxTy
));
16879 VecIn
.push_back(VecIn1
);
16880 VecIn
.push_back(VecIn2
);
16881 DidSplitVec
= true;
16883 for (unsigned i
= 0; i
< NumElems
; i
++) {
16884 if (VectorMask
[i
] <= 0)
16886 VectorMask
[i
] = (IndexVec
[i
] < SplitSize
) ? 1 : 2;
16892 // TODO: We want to sort the vectors by descending length, so that adjacent
16893 // pairs have similar length, and the longer vector is always first in the
16896 // TODO: Should this fire if some of the input vectors has illegal type (like
16897 // it does now), or should we let legalization run its course first?
16900 // Take pairs of vectors, and shuffle them so that the result has elements
16901 // from these vectors in the correct places.
16902 // For example, given:
16903 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16904 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16905 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16906 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16907 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16908 // We will generate:
16909 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16910 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16911 SmallVector
<SDValue
, 4> Shuffles
;
16912 for (unsigned In
= 0, Len
= (VecIn
.size() / 2); In
< Len
; ++In
) {
16913 unsigned LeftIdx
= 2 * In
+ 1;
16914 SDValue VecLeft
= VecIn
[LeftIdx
];
16916 (LeftIdx
+ 1) < VecIn
.size() ? VecIn
[LeftIdx
+ 1] : SDValue();
16918 if (SDValue Shuffle
= createBuildVecShuffle(DL
, N
, VectorMask
, VecLeft
,
16919 VecRight
, LeftIdx
, DidSplitVec
))
16920 Shuffles
.push_back(Shuffle
);
16925 // If we need the zero vector as an "ingredient" in the blend tree, add it
16926 // to the list of shuffles.
16927 if (UsesZeroVector
)
16928 Shuffles
.push_back(VT
.isInteger() ? DAG
.getConstant(0, DL
, VT
)
16929 : DAG
.getConstantFP(0.0, DL
, VT
));
16931 // If we only have one shuffle, we're done.
16932 if (Shuffles
.size() == 1)
16933 return Shuffles
[0];
16935 // Update the vector mask to point to the post-shuffle vectors.
16936 for (int &Vec
: VectorMask
)
16938 Vec
= Shuffles
.size() - 1;
16940 Vec
= (Vec
- 1) / 2;
16942 // More than one shuffle. Generate a binary tree of blends, e.g. if from
16943 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16945 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16946 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16947 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16948 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16949 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16950 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16951 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16953 // Make sure the initial size of the shuffle list is even.
16954 if (Shuffles
.size() % 2)
16955 Shuffles
.push_back(DAG
.getUNDEF(VT
));
16957 for (unsigned CurSize
= Shuffles
.size(); CurSize
> 1; CurSize
/= 2) {
16959 Shuffles
[CurSize
] = DAG
.getUNDEF(VT
);
16962 for (unsigned In
= 0, Len
= CurSize
/ 2; In
< Len
; ++In
) {
16964 int Right
= 2 * In
+ 1;
16965 SmallVector
<int, 8> Mask(NumElems
, -1);
16966 for (unsigned i
= 0; i
!= NumElems
; ++i
) {
16967 if (VectorMask
[i
] == Left
) {
16969 VectorMask
[i
] = In
;
16970 } else if (VectorMask
[i
] == Right
) {
16971 Mask
[i
] = i
+ NumElems
;
16972 VectorMask
[i
] = In
;
16977 DAG
.getVectorShuffle(VT
, DL
, Shuffles
[Left
], Shuffles
[Right
], Mask
);
16980 return Shuffles
[0];
16983 // Try to turn a build vector of zero extends of extract vector elts into a
16984 // a vector zero extend and possibly an extract subvector.
16985 // TODO: Support sign extend?
16986 // TODO: Allow undef elements?
16987 SDValue
DAGCombiner::convertBuildVecZextToZext(SDNode
*N
) {
16988 if (LegalOperations
)
16991 EVT VT
= N
->getValueType(0);
16993 bool FoundZeroExtend
= false;
16994 SDValue Op0
= N
->getOperand(0);
16995 auto checkElem
= [&](SDValue Op
) -> int64_t {
16996 unsigned Opc
= Op
.getOpcode();
16997 FoundZeroExtend
|= (Opc
== ISD::ZERO_EXTEND
);
16998 if ((Op
.getOpcode() == ISD::ZERO_EXTEND
|| Opc
== ISD::ANY_EXTEND
) &&
16999 Op
.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT
&&
17000 Op0
.getOperand(0).getOperand(0) == Op
.getOperand(0).getOperand(0))
17001 if (auto *C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(0).getOperand(1)))
17002 return C
->getZExtValue();
17006 // Make sure the first element matches
17007 // (zext (extract_vector_elt X, C))
17008 int64_t Offset
= checkElem(Op0
);
17012 unsigned NumElems
= N
->getNumOperands();
17013 SDValue In
= Op0
.getOperand(0).getOperand(0);
17014 EVT InSVT
= In
.getValueType().getScalarType();
17015 EVT InVT
= EVT::getVectorVT(*DAG
.getContext(), InSVT
, NumElems
);
17017 // Don't create an illegal input type after type legalization.
17018 if (LegalTypes
&& !TLI
.isTypeLegal(InVT
))
17021 // Ensure all the elements come from the same vector and are adjacent.
17022 for (unsigned i
= 1; i
!= NumElems
; ++i
) {
17023 if ((Offset
+ i
) != checkElem(N
->getOperand(i
)))
17028 In
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, InVT
, In
,
17029 Op0
.getOperand(0).getOperand(1));
17030 return DAG
.getNode(FoundZeroExtend
? ISD::ZERO_EXTEND
: ISD::ANY_EXTEND
, DL
,
17034 SDValue
DAGCombiner::visitBUILD_VECTOR(SDNode
*N
) {
17035 EVT VT
= N
->getValueType(0);
17037 // A vector built entirely of undefs is undef.
17038 if (ISD::allOperandsUndef(N
))
17039 return DAG
.getUNDEF(VT
);
17041 // If this is a splat of a bitcast from another vector, change to a
17044 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17045 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17047 // If X is a build_vector itself, the concat can become a larger build_vector.
17048 // TODO: Maybe this is useful for non-splat too?
17049 if (!LegalOperations
) {
17050 if (SDValue Splat
= cast
<BuildVectorSDNode
>(N
)->getSplatValue()) {
17051 Splat
= peekThroughBitcasts(Splat
);
17052 EVT SrcVT
= Splat
.getValueType();
17053 if (SrcVT
.isVector()) {
17054 unsigned NumElts
= N
->getNumOperands() * SrcVT
.getVectorNumElements();
17055 EVT NewVT
= EVT::getVectorVT(*DAG
.getContext(),
17056 SrcVT
.getVectorElementType(), NumElts
);
17057 if (!LegalTypes
|| TLI
.isTypeLegal(NewVT
)) {
17058 SmallVector
<SDValue
, 8> Ops(N
->getNumOperands(), Splat
);
17059 SDValue Concat
= DAG
.getNode(ISD::CONCAT_VECTORS
, SDLoc(N
),
17061 return DAG
.getBitcast(VT
, Concat
);
17067 // Check if we can express BUILD VECTOR via subvector extract.
17068 if (!LegalTypes
&& (N
->getNumOperands() > 1)) {
17069 SDValue Op0
= N
->getOperand(0);
17070 auto checkElem
= [&](SDValue Op
) -> uint64_t {
17071 if ((Op
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) &&
17072 (Op0
.getOperand(0) == Op
.getOperand(0)))
17073 if (auto CNode
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1)))
17074 return CNode
->getZExtValue();
17078 int Offset
= checkElem(Op0
);
17079 for (unsigned i
= 0; i
< N
->getNumOperands(); ++i
) {
17080 if (Offset
+ i
!= checkElem(N
->getOperand(i
))) {
17086 if ((Offset
== 0) &&
17087 (Op0
.getOperand(0).getValueType() == N
->getValueType(0)))
17088 return Op0
.getOperand(0);
17089 if ((Offset
!= -1) &&
17090 ((Offset
% N
->getValueType(0).getVectorNumElements()) ==
17091 0)) // IDX must be multiple of output size.
17092 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, SDLoc(N
), N
->getValueType(0),
17093 Op0
.getOperand(0), Op0
.getOperand(1));
17096 if (SDValue V
= convertBuildVecZextToZext(N
))
17099 if (SDValue V
= reduceBuildVecExtToExtBuildVec(N
))
17102 if (SDValue V
= reduceBuildVecToShuffle(N
))
17108 static SDValue
combineConcatVectorOfScalars(SDNode
*N
, SelectionDAG
&DAG
) {
17109 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
17110 EVT OpVT
= N
->getOperand(0).getValueType();
17112 // If the operands are legal vectors, leave them alone.
17113 if (TLI
.isTypeLegal(OpVT
))
17117 EVT VT
= N
->getValueType(0);
17118 SmallVector
<SDValue
, 8> Ops
;
17120 EVT SVT
= EVT::getIntegerVT(*DAG
.getContext(), OpVT
.getSizeInBits());
17121 SDValue ScalarUndef
= DAG
.getNode(ISD::UNDEF
, DL
, SVT
);
17123 // Keep track of what we encounter.
17124 bool AnyInteger
= false;
17125 bool AnyFP
= false;
17126 for (const SDValue
&Op
: N
->ops()) {
17127 if (ISD::BITCAST
== Op
.getOpcode() &&
17128 !Op
.getOperand(0).getValueType().isVector())
17129 Ops
.push_back(Op
.getOperand(0));
17130 else if (ISD::UNDEF
== Op
.getOpcode())
17131 Ops
.push_back(ScalarUndef
);
17135 // Note whether we encounter an integer or floating point scalar.
17136 // If it's neither, bail out, it could be something weird like x86mmx.
17137 EVT LastOpVT
= Ops
.back().getValueType();
17138 if (LastOpVT
.isFloatingPoint())
17140 else if (LastOpVT
.isInteger())
17146 // If any of the operands is a floating point scalar bitcast to a vector,
17147 // use floating point types throughout, and bitcast everything.
17148 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17150 SVT
= EVT::getFloatingPointVT(OpVT
.getSizeInBits());
17151 ScalarUndef
= DAG
.getNode(ISD::UNDEF
, DL
, SVT
);
17153 for (SDValue
&Op
: Ops
) {
17154 if (Op
.getValueType() == SVT
)
17159 Op
= DAG
.getBitcast(SVT
, Op
);
17164 EVT VecVT
= EVT::getVectorVT(*DAG
.getContext(), SVT
,
17165 VT
.getSizeInBits() / SVT
.getSizeInBits());
17166 return DAG
.getBitcast(VT
, DAG
.getBuildVector(VecVT
, DL
, Ops
));
17169 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17170 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17171 // most two distinct vectors the same size as the result, attempt to turn this
17172 // into a legal shuffle.
17173 static SDValue
combineConcatVectorOfExtracts(SDNode
*N
, SelectionDAG
&DAG
) {
17174 EVT VT
= N
->getValueType(0);
17175 EVT OpVT
= N
->getOperand(0).getValueType();
17176 int NumElts
= VT
.getVectorNumElements();
17177 int NumOpElts
= OpVT
.getVectorNumElements();
17179 SDValue SV0
= DAG
.getUNDEF(VT
), SV1
= DAG
.getUNDEF(VT
);
17180 SmallVector
<int, 8> Mask
;
17182 for (SDValue Op
: N
->ops()) {
17183 Op
= peekThroughBitcasts(Op
);
17185 // UNDEF nodes convert to UNDEF shuffle mask values.
17186 if (Op
.isUndef()) {
17187 Mask
.append((unsigned)NumOpElts
, -1);
17191 if (Op
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
17194 // What vector are we extracting the subvector from and at what index?
17195 SDValue ExtVec
= Op
.getOperand(0);
17197 // We want the EVT of the original extraction to correctly scale the
17198 // extraction index.
17199 EVT ExtVT
= ExtVec
.getValueType();
17200 ExtVec
= peekThroughBitcasts(ExtVec
);
17202 // UNDEF nodes convert to UNDEF shuffle mask values.
17203 if (ExtVec
.isUndef()) {
17204 Mask
.append((unsigned)NumOpElts
, -1);
17208 if (!isa
<ConstantSDNode
>(Op
.getOperand(1)))
17210 int ExtIdx
= Op
.getConstantOperandVal(1);
17212 // Ensure that we are extracting a subvector from a vector the same
17213 // size as the result.
17214 if (ExtVT
.getSizeInBits() != VT
.getSizeInBits())
17217 // Scale the subvector index to account for any bitcast.
17218 int NumExtElts
= ExtVT
.getVectorNumElements();
17219 if (0 == (NumExtElts
% NumElts
))
17220 ExtIdx
/= (NumExtElts
/ NumElts
);
17221 else if (0 == (NumElts
% NumExtElts
))
17222 ExtIdx
*= (NumElts
/ NumExtElts
);
17226 // At most we can reference 2 inputs in the final shuffle.
17227 if (SV0
.isUndef() || SV0
== ExtVec
) {
17229 for (int i
= 0; i
!= NumOpElts
; ++i
)
17230 Mask
.push_back(i
+ ExtIdx
);
17231 } else if (SV1
.isUndef() || SV1
== ExtVec
) {
17233 for (int i
= 0; i
!= NumOpElts
; ++i
)
17234 Mask
.push_back(i
+ ExtIdx
+ NumElts
);
17240 if (!DAG
.getTargetLoweringInfo().isShuffleMaskLegal(Mask
, VT
))
17243 return DAG
.getVectorShuffle(VT
, SDLoc(N
), DAG
.getBitcast(VT
, SV0
),
17244 DAG
.getBitcast(VT
, SV1
), Mask
);
17247 SDValue
DAGCombiner::visitCONCAT_VECTORS(SDNode
*N
) {
17248 // If we only have one input vector, we don't need to do any concatenation.
17249 if (N
->getNumOperands() == 1)
17250 return N
->getOperand(0);
17252 // Check if all of the operands are undefs.
17253 EVT VT
= N
->getValueType(0);
17254 if (ISD::allOperandsUndef(N
))
17255 return DAG
.getUNDEF(VT
);
17257 // Optimize concat_vectors where all but the first of the vectors are undef.
17258 if (std::all_of(std::next(N
->op_begin()), N
->op_end(), [](const SDValue
&Op
) {
17259 return Op
.isUndef();
17261 SDValue In
= N
->getOperand(0);
17262 assert(In
.getValueType().isVector() && "Must concat vectors");
17264 SDValue Scalar
= peekThroughOneUseBitcasts(In
);
17266 // concat_vectors(scalar_to_vector(scalar), undef) ->
17267 // scalar_to_vector(scalar)
17268 if (!LegalOperations
&& Scalar
.getOpcode() == ISD::SCALAR_TO_VECTOR
&&
17269 Scalar
.hasOneUse()) {
17270 EVT SVT
= Scalar
.getValueType().getVectorElementType();
17271 if (SVT
== Scalar
.getOperand(0).getValueType())
17272 Scalar
= Scalar
.getOperand(0);
17275 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17276 if (!Scalar
.getValueType().isVector()) {
17277 // If the bitcast type isn't legal, it might be a trunc of a legal type;
17278 // look through the trunc so we can still do the transform:
17279 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17280 if (Scalar
->getOpcode() == ISD::TRUNCATE
&&
17281 !TLI
.isTypeLegal(Scalar
.getValueType()) &&
17282 TLI
.isTypeLegal(Scalar
->getOperand(0).getValueType()))
17283 Scalar
= Scalar
->getOperand(0);
17285 EVT SclTy
= Scalar
.getValueType();
17287 if (!SclTy
.isFloatingPoint() && !SclTy
.isInteger())
17290 // Bail out if the vector size is not a multiple of the scalar size.
17291 if (VT
.getSizeInBits() % SclTy
.getSizeInBits())
17294 unsigned VNTNumElms
= VT
.getSizeInBits() / SclTy
.getSizeInBits();
17295 if (VNTNumElms
< 2)
17298 EVT NVT
= EVT::getVectorVT(*DAG
.getContext(), SclTy
, VNTNumElms
);
17299 if (!TLI
.isTypeLegal(NVT
) || !TLI
.isTypeLegal(Scalar
.getValueType()))
17302 SDValue Res
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, SDLoc(N
), NVT
, Scalar
);
17303 return DAG
.getBitcast(VT
, Res
);
17307 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17308 // We have already tested above for an UNDEF only concatenation.
17309 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17310 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17311 auto IsBuildVectorOrUndef
= [](const SDValue
&Op
) {
17312 return ISD::UNDEF
== Op
.getOpcode() || ISD::BUILD_VECTOR
== Op
.getOpcode();
17314 if (llvm::all_of(N
->ops(), IsBuildVectorOrUndef
)) {
17315 SmallVector
<SDValue
, 8> Opnds
;
17316 EVT SVT
= VT
.getScalarType();
17319 if (!SVT
.isFloatingPoint()) {
17320 // If BUILD_VECTOR are from built from integer, they may have different
17321 // operand types. Get the smallest type and truncate all operands to it.
17322 bool FoundMinVT
= false;
17323 for (const SDValue
&Op
: N
->ops())
17324 if (ISD::BUILD_VECTOR
== Op
.getOpcode()) {
17325 EVT OpSVT
= Op
.getOperand(0).getValueType();
17326 MinVT
= (!FoundMinVT
|| OpSVT
.bitsLE(MinVT
)) ? OpSVT
: MinVT
;
17329 assert(FoundMinVT
&& "Concat vector type mismatch");
17332 for (const SDValue
&Op
: N
->ops()) {
17333 EVT OpVT
= Op
.getValueType();
17334 unsigned NumElts
= OpVT
.getVectorNumElements();
17336 if (ISD::UNDEF
== Op
.getOpcode())
17337 Opnds
.append(NumElts
, DAG
.getUNDEF(MinVT
));
17339 if (ISD::BUILD_VECTOR
== Op
.getOpcode()) {
17340 if (SVT
.isFloatingPoint()) {
17341 assert(SVT
== OpVT
.getScalarType() && "Concat vector type mismatch");
17342 Opnds
.append(Op
->op_begin(), Op
->op_begin() + NumElts
);
17344 for (unsigned i
= 0; i
!= NumElts
; ++i
)
17346 DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), MinVT
, Op
.getOperand(i
)));
17351 assert(VT
.getVectorNumElements() == Opnds
.size() &&
17352 "Concat vector type mismatch");
17353 return DAG
.getBuildVector(VT
, SDLoc(N
), Opnds
);
17356 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17357 if (SDValue V
= combineConcatVectorOfScalars(N
, DAG
))
17360 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17361 if (Level
< AfterLegalizeVectorOps
&& TLI
.isTypeLegal(VT
))
17362 if (SDValue V
= combineConcatVectorOfExtracts(N
, DAG
))
17365 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17366 // nodes often generate nop CONCAT_VECTOR nodes.
17367 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17368 // place the incoming vectors at the exact same location.
17369 SDValue SingleSource
= SDValue();
17370 unsigned PartNumElem
= N
->getOperand(0).getValueType().getVectorNumElements();
17372 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
17373 SDValue Op
= N
->getOperand(i
);
17378 // Check if this is the identity extract:
17379 if (Op
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
17382 // Find the single incoming vector for the extract_subvector.
17383 if (SingleSource
.getNode()) {
17384 if (Op
.getOperand(0) != SingleSource
)
17387 SingleSource
= Op
.getOperand(0);
17389 // Check the source type is the same as the type of the result.
17390 // If not, this concat may extend the vector, so we can not
17391 // optimize it away.
17392 if (SingleSource
.getValueType() != N
->getValueType(0))
17396 unsigned IdentityIndex
= i
* PartNumElem
;
17397 ConstantSDNode
*CS
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
17398 // The extract index must be constant.
17402 // Check that we are reading from the identity index.
17403 if (CS
->getZExtValue() != IdentityIndex
)
17407 if (SingleSource
.getNode())
17408 return SingleSource
;
17413 /// If we are extracting a subvector produced by a wide binary operator try
17414 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17415 static SDValue
narrowExtractedVectorBinOp(SDNode
*Extract
, SelectionDAG
&DAG
) {
17416 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17417 // some of these bailouts with other transforms.
17419 // The extract index must be a constant, so we can map it to a concat operand.
17420 auto *ExtractIndexC
= dyn_cast
<ConstantSDNode
>(Extract
->getOperand(1));
17421 if (!ExtractIndexC
)
17424 // We are looking for an optionally bitcasted wide vector binary operator
17425 // feeding an extract subvector.
17426 SDValue BinOp
= peekThroughBitcasts(Extract
->getOperand(0));
17427 if (!ISD::isBinaryOp(BinOp
.getNode()))
17430 // The binop must be a vector type, so we can extract some fraction of it.
17431 EVT WideBVT
= BinOp
.getValueType();
17432 if (!WideBVT
.isVector())
17435 EVT VT
= Extract
->getValueType(0);
17436 unsigned ExtractIndex
= ExtractIndexC
->getZExtValue();
17437 assert(ExtractIndex
% VT
.getVectorNumElements() == 0 &&
17438 "Extract index is not a multiple of the vector length.");
17440 // Bail out if this is not a proper multiple width extraction.
17441 unsigned WideWidth
= WideBVT
.getSizeInBits();
17442 unsigned NarrowWidth
= VT
.getSizeInBits();
17443 if (WideWidth
% NarrowWidth
!= 0)
17446 // Bail out if we are extracting a fraction of a single operation. This can
17447 // occur because we potentially looked through a bitcast of the binop.
17448 unsigned NarrowingRatio
= WideWidth
/ NarrowWidth
;
17449 unsigned WideNumElts
= WideBVT
.getVectorNumElements();
17450 if (WideNumElts
% NarrowingRatio
!= 0)
17453 // Bail out if the target does not support a narrower version of the binop.
17454 EVT NarrowBVT
= EVT::getVectorVT(*DAG
.getContext(), WideBVT
.getScalarType(),
17455 WideNumElts
/ NarrowingRatio
);
17456 unsigned BOpcode
= BinOp
.getOpcode();
17457 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
17458 if (!TLI
.isOperationLegalOrCustomOrPromote(BOpcode
, NarrowBVT
))
17461 // If extraction is cheap, we don't need to look at the binop operands
17462 // for concat ops. The narrow binop alone makes this transform profitable.
17463 // We can't just reuse the original extract index operand because we may have
17465 unsigned ConcatOpNum
= ExtractIndex
/ VT
.getVectorNumElements();
17466 unsigned ExtBOIdx
= ConcatOpNum
* NarrowBVT
.getVectorNumElements();
17467 EVT ExtBOIdxVT
= Extract
->getOperand(1).getValueType();
17468 if (TLI
.isExtractSubvectorCheap(NarrowBVT
, WideBVT
, ExtBOIdx
) &&
17469 BinOp
.hasOneUse() && Extract
->getOperand(0)->hasOneUse()) {
17470 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17472 SDValue NewExtIndex
= DAG
.getConstant(ExtBOIdx
, DL
, ExtBOIdxVT
);
17473 SDValue X
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, NarrowBVT
,
17474 BinOp
.getOperand(0), NewExtIndex
);
17475 SDValue Y
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, NarrowBVT
,
17476 BinOp
.getOperand(1), NewExtIndex
);
17477 SDValue NarrowBinOp
= DAG
.getNode(BOpcode
, DL
, NarrowBVT
, X
, Y
,
17478 BinOp
.getNode()->getFlags());
17479 return DAG
.getBitcast(VT
, NarrowBinOp
);
17482 // Only handle the case where we are doubling and then halving. A larger ratio
17483 // may require more than two narrow binops to replace the wide binop.
17484 if (NarrowingRatio
!= 2)
17487 // TODO: The motivating case for this transform is an x86 AVX1 target. That
17488 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17489 // flavors, but no other 256-bit integer support. This could be extended to
17490 // handle any binop, but that may require fixing/adding other folds to avoid
17491 // codegen regressions.
17492 if (BOpcode
!= ISD::AND
&& BOpcode
!= ISD::OR
&& BOpcode
!= ISD::XOR
)
17495 // We need at least one concatenation operation of a binop operand to make
17496 // this transform worthwhile. The concat must double the input vector sizes.
17497 // TODO: Should we also handle INSERT_SUBVECTOR patterns?
17498 SDValue LHS
= peekThroughBitcasts(BinOp
.getOperand(0));
17499 SDValue RHS
= peekThroughBitcasts(BinOp
.getOperand(1));
17501 LHS
.getOpcode() == ISD::CONCAT_VECTORS
&& LHS
.getNumOperands() == 2;
17503 RHS
.getOpcode() == ISD::CONCAT_VECTORS
&& RHS
.getNumOperands() == 2;
17504 if (ConcatL
|| ConcatR
) {
17505 // If a binop operand was not the result of a concat, we must extract a
17506 // half-sized operand for our new narrow binop:
17507 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17508 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
17509 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
17511 SDValue IndexC
= DAG
.getConstant(ExtBOIdx
, DL
, ExtBOIdxVT
);
17512 SDValue X
= ConcatL
? DAG
.getBitcast(NarrowBVT
, LHS
.getOperand(ConcatOpNum
))
17513 : DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, NarrowBVT
,
17514 BinOp
.getOperand(0), IndexC
);
17516 SDValue Y
= ConcatR
? DAG
.getBitcast(NarrowBVT
, RHS
.getOperand(ConcatOpNum
))
17517 : DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, DL
, NarrowBVT
,
17518 BinOp
.getOperand(1), IndexC
);
17520 SDValue NarrowBinOp
= DAG
.getNode(BOpcode
, DL
, NarrowBVT
, X
, Y
);
17521 return DAG
.getBitcast(VT
, NarrowBinOp
);
17527 /// If we are extracting a subvector from a wide vector load, convert to a
17528 /// narrow load to eliminate the extraction:
17529 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17530 static SDValue
narrowExtractedVectorLoad(SDNode
*Extract
, SelectionDAG
&DAG
) {
17531 // TODO: Add support for big-endian. The offset calculation must be adjusted.
17532 if (DAG
.getDataLayout().isBigEndian())
17535 auto *Ld
= dyn_cast
<LoadSDNode
>(Extract
->getOperand(0));
17536 auto *ExtIdx
= dyn_cast
<ConstantSDNode
>(Extract
->getOperand(1));
17537 if (!Ld
|| Ld
->getExtensionType() || Ld
->isVolatile() || !ExtIdx
)
17540 // Allow targets to opt-out.
17541 EVT VT
= Extract
->getValueType(0);
17542 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
17543 if (!TLI
.shouldReduceLoadWidth(Ld
, Ld
->getExtensionType(), VT
))
17546 // The narrow load will be offset from the base address of the old load if
17547 // we are extracting from something besides index 0 (little-endian).
17549 SDValue BaseAddr
= Ld
->getOperand(1);
17550 unsigned Offset
= ExtIdx
->getZExtValue() * VT
.getScalarType().getStoreSize();
17552 // TODO: Use "BaseIndexOffset" to make this more effective.
17553 SDValue NewAddr
= DAG
.getMemBasePlusOffset(BaseAddr
, Offset
, DL
);
17554 MachineFunction
&MF
= DAG
.getMachineFunction();
17555 MachineMemOperand
*MMO
= MF
.getMachineMemOperand(Ld
->getMemOperand(), Offset
,
17556 VT
.getStoreSize());
17557 SDValue NewLd
= DAG
.getLoad(VT
, DL
, Ld
->getChain(), NewAddr
, MMO
);
17558 DAG
.makeEquivalentMemoryOrdering(Ld
, NewLd
);
17562 SDValue
DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode
*N
) {
17563 EVT NVT
= N
->getValueType(0);
17564 SDValue V
= N
->getOperand(0);
17566 // Extract from UNDEF is UNDEF.
17568 return DAG
.getUNDEF(NVT
);
17570 if (TLI
.isOperationLegalOrCustomOrPromote(ISD::LOAD
, NVT
))
17571 if (SDValue NarrowLoad
= narrowExtractedVectorLoad(N
, DAG
))
17574 // Combine an extract of an extract into a single extract_subvector.
17575 // ext (ext X, C), 0 --> ext X, C
17576 if (isNullConstant(N
->getOperand(1)) &&
17577 V
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&& V
.hasOneUse() &&
17578 isa
<ConstantSDNode
>(V
.getOperand(1))) {
17579 if (TLI
.isExtractSubvectorCheap(NVT
, V
.getOperand(0).getValueType(),
17580 V
.getConstantOperandVal(1)) &&
17581 TLI
.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR
, NVT
)) {
17582 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, SDLoc(N
), NVT
, V
.getOperand(0),
17588 // (extract_subvec (concat V1, V2, ...), i)
17591 // Only operand 0 is checked as 'concat' assumes all inputs of the same
17593 if (V
.getOpcode() == ISD::CONCAT_VECTORS
&&
17594 isa
<ConstantSDNode
>(N
->getOperand(1)) &&
17595 V
.getOperand(0).getValueType() == NVT
) {
17596 unsigned Idx
= N
->getConstantOperandVal(1);
17597 unsigned NumElems
= NVT
.getVectorNumElements();
17598 assert((Idx
% NumElems
) == 0 &&
17599 "IDX in concat is not a multiple of the result vector length.");
17600 return V
->getOperand(Idx
/ NumElems
);
17603 V
= peekThroughBitcasts(V
);
17605 // If the input is a build vector. Try to make a smaller build vector.
17606 if (V
.getOpcode() == ISD::BUILD_VECTOR
) {
17607 if (auto *Idx
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
17608 EVT InVT
= V
.getValueType();
17609 unsigned ExtractSize
= NVT
.getSizeInBits();
17610 unsigned EltSize
= InVT
.getScalarSizeInBits();
17611 // Only do this if we won't split any elements.
17612 if (ExtractSize
% EltSize
== 0) {
17613 unsigned NumElems
= ExtractSize
/ EltSize
;
17614 EVT EltVT
= InVT
.getVectorElementType();
17615 EVT ExtractVT
= NumElems
== 1 ? EltVT
17616 : EVT::getVectorVT(*DAG
.getContext(),
17618 if ((Level
< AfterLegalizeDAG
||
17620 TLI
.isOperationLegal(ISD::BUILD_VECTOR
, ExtractVT
))) &&
17621 (!LegalTypes
|| TLI
.isTypeLegal(ExtractVT
))) {
17622 unsigned IdxVal
= Idx
->getZExtValue();
17623 IdxVal
*= NVT
.getScalarSizeInBits();
17626 if (NumElems
== 1) {
17627 SDValue Src
= V
->getOperand(IdxVal
);
17628 if (EltVT
!= Src
.getValueType())
17629 Src
= DAG
.getNode(ISD::TRUNCATE
, SDLoc(N
), InVT
, Src
);
17630 return DAG
.getBitcast(NVT
, Src
);
17633 // Extract the pieces from the original build_vector.
17634 SDValue BuildVec
= DAG
.getBuildVector(
17635 ExtractVT
, SDLoc(N
), V
->ops().slice(IdxVal
, NumElems
));
17636 return DAG
.getBitcast(NVT
, BuildVec
);
17642 if (V
.getOpcode() == ISD::INSERT_SUBVECTOR
) {
17643 // Handle only simple case where vector being inserted and vector
17644 // being extracted are of same size.
17645 EVT SmallVT
= V
.getOperand(1).getValueType();
17646 if (!NVT
.bitsEq(SmallVT
))
17649 // Only handle cases where both indexes are constants.
17650 auto *ExtIdx
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
17651 auto *InsIdx
= dyn_cast
<ConstantSDNode
>(V
.getOperand(2));
17653 if (InsIdx
&& ExtIdx
) {
17655 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17657 // indices are equal or bit offsets are equal => V1
17658 // otherwise => (extract_subvec V1, ExtIdx)
17659 if (InsIdx
->getZExtValue() * SmallVT
.getScalarSizeInBits() ==
17660 ExtIdx
->getZExtValue() * NVT
.getScalarSizeInBits())
17661 return DAG
.getBitcast(NVT
, V
.getOperand(1));
17662 return DAG
.getNode(
17663 ISD::EXTRACT_SUBVECTOR
, SDLoc(N
), NVT
,
17664 DAG
.getBitcast(N
->getOperand(0).getValueType(), V
.getOperand(0)),
17669 if (SDValue NarrowBOp
= narrowExtractedVectorBinOp(N
, DAG
))
17672 if (SimplifyDemandedVectorElts(SDValue(N
, 0)))
17673 return SDValue(N
, 0);
17678 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
17679 /// followed by concatenation. Narrow vector ops may have better performance
17680 /// than wide ops, and this can unlock further narrowing of other vector ops.
17681 /// Targets can invert this transform later if it is not profitable.
17682 static SDValue
foldShuffleOfConcatUndefs(ShuffleVectorSDNode
*Shuf
,
17683 SelectionDAG
&DAG
) {
17684 SDValue N0
= Shuf
->getOperand(0), N1
= Shuf
->getOperand(1);
17685 if (N0
.getOpcode() != ISD::CONCAT_VECTORS
|| N0
.getNumOperands() != 2 ||
17686 N1
.getOpcode() != ISD::CONCAT_VECTORS
|| N1
.getNumOperands() != 2 ||
17687 !N0
.getOperand(1).isUndef() || !N1
.getOperand(1).isUndef())
17690 // Split the wide shuffle mask into halves. Any mask element that is accessing
17691 // operand 1 is offset down to account for narrowing of the vectors.
17692 ArrayRef
<int> Mask
= Shuf
->getMask();
17693 EVT VT
= Shuf
->getValueType(0);
17694 unsigned NumElts
= VT
.getVectorNumElements();
17695 unsigned HalfNumElts
= NumElts
/ 2;
17696 SmallVector
<int, 16> Mask0(HalfNumElts
, -1);
17697 SmallVector
<int, 16> Mask1(HalfNumElts
, -1);
17698 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
17701 int M
= Mask
[i
] < (int)NumElts
? Mask
[i
] : Mask
[i
] - (int)HalfNumElts
;
17702 if (i
< HalfNumElts
)
17705 Mask1
[i
- HalfNumElts
] = M
;
17708 // Ask the target if this is a valid transform.
17709 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
17710 EVT HalfVT
= EVT::getVectorVT(*DAG
.getContext(), VT
.getScalarType(),
17712 if (!TLI
.isShuffleMaskLegal(Mask0
, HalfVT
) ||
17713 !TLI
.isShuffleMaskLegal(Mask1
, HalfVT
))
17716 // shuffle (concat X, undef), (concat Y, undef), Mask -->
17717 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
17718 SDValue X
= N0
.getOperand(0), Y
= N1
.getOperand(0);
17720 SDValue Shuf0
= DAG
.getVectorShuffle(HalfVT
, DL
, X
, Y
, Mask0
);
17721 SDValue Shuf1
= DAG
.getVectorShuffle(HalfVT
, DL
, X
, Y
, Mask1
);
17722 return DAG
.getNode(ISD::CONCAT_VECTORS
, DL
, VT
, Shuf0
, Shuf1
);
17725 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17726 // or turn a shuffle of a single concat into simpler shuffle then concat.
17727 static SDValue
partitionShuffleOfConcats(SDNode
*N
, SelectionDAG
&DAG
) {
17728 EVT VT
= N
->getValueType(0);
17729 unsigned NumElts
= VT
.getVectorNumElements();
17731 SDValue N0
= N
->getOperand(0);
17732 SDValue N1
= N
->getOperand(1);
17733 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
17734 ArrayRef
<int> Mask
= SVN
->getMask();
17736 SmallVector
<SDValue
, 4> Ops
;
17737 EVT ConcatVT
= N0
.getOperand(0).getValueType();
17738 unsigned NumElemsPerConcat
= ConcatVT
.getVectorNumElements();
17739 unsigned NumConcats
= NumElts
/ NumElemsPerConcat
;
17741 auto IsUndefMaskElt
= [](int i
) { return i
== -1; };
17743 // Special case: shuffle(concat(A,B)) can be more efficiently represented
17744 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17745 // half vector elements.
17746 if (NumElemsPerConcat
* 2 == NumElts
&& N1
.isUndef() &&
17747 llvm::all_of(Mask
.slice(NumElemsPerConcat
, NumElemsPerConcat
),
17749 N0
= DAG
.getVectorShuffle(ConcatVT
, SDLoc(N
), N0
.getOperand(0),
17751 Mask
.slice(0, NumElemsPerConcat
));
17752 N1
= DAG
.getUNDEF(ConcatVT
);
17753 return DAG
.getNode(ISD::CONCAT_VECTORS
, SDLoc(N
), VT
, N0
, N1
);
17756 // Look at every vector that's inserted. We're looking for exact
17757 // subvector-sized copies from a concatenated vector
17758 for (unsigned I
= 0; I
!= NumConcats
; ++I
) {
17759 unsigned Begin
= I
* NumElemsPerConcat
;
17760 ArrayRef
<int> SubMask
= Mask
.slice(Begin
, NumElemsPerConcat
);
17762 // Make sure we're dealing with a copy.
17763 if (llvm::all_of(SubMask
, IsUndefMaskElt
)) {
17764 Ops
.push_back(DAG
.getUNDEF(ConcatVT
));
17769 for (int i
= 0; i
!= (int)NumElemsPerConcat
; ++i
) {
17770 if (IsUndefMaskElt(SubMask
[i
]))
17772 if ((SubMask
[i
] % (int)NumElemsPerConcat
) != i
)
17774 int EltOpIdx
= SubMask
[i
] / NumElemsPerConcat
;
17775 if (0 <= OpIdx
&& EltOpIdx
!= OpIdx
)
17779 assert(0 <= OpIdx
&& "Unknown concat_vectors op");
17781 if (OpIdx
< (int)N0
.getNumOperands())
17782 Ops
.push_back(N0
.getOperand(OpIdx
));
17784 Ops
.push_back(N1
.getOperand(OpIdx
- N0
.getNumOperands()));
17787 return DAG
.getNode(ISD::CONCAT_VECTORS
, SDLoc(N
), VT
, Ops
);
17790 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17791 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17793 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17794 // a simplification in some sense, but it isn't appropriate in general: some
17795 // BUILD_VECTORs are substantially cheaper than others. The general case
17796 // of a BUILD_VECTOR requires inserting each element individually (or
17797 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17798 // all constants is a single constant pool load. A BUILD_VECTOR where each
17799 // element is identical is a splat. A BUILD_VECTOR where most of the operands
17800 // are undef lowers to a small number of element insertions.
17802 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17803 // We don't fold shuffles where one side is a non-zero constant, and we don't
17804 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17805 // non-constant operands. This seems to work out reasonably well in practice.
17806 static SDValue
combineShuffleOfScalars(ShuffleVectorSDNode
*SVN
,
17808 const TargetLowering
&TLI
) {
17809 EVT VT
= SVN
->getValueType(0);
17810 unsigned NumElts
= VT
.getVectorNumElements();
17811 SDValue N0
= SVN
->getOperand(0);
17812 SDValue N1
= SVN
->getOperand(1);
17814 if (!N0
->hasOneUse())
17817 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17818 // discussed above.
17819 if (!N1
.isUndef()) {
17820 if (!N1
->hasOneUse())
17823 bool N0AnyConst
= isAnyConstantBuildVector(N0
);
17824 bool N1AnyConst
= isAnyConstantBuildVector(N1
);
17825 if (N0AnyConst
&& !N1AnyConst
&& !ISD::isBuildVectorAllZeros(N0
.getNode()))
17827 if (!N0AnyConst
&& N1AnyConst
&& !ISD::isBuildVectorAllZeros(N1
.getNode()))
17831 // If both inputs are splats of the same value then we can safely merge this
17832 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17833 bool IsSplat
= false;
17834 auto *BV0
= dyn_cast
<BuildVectorSDNode
>(N0
);
17835 auto *BV1
= dyn_cast
<BuildVectorSDNode
>(N1
);
17837 if (SDValue Splat0
= BV0
->getSplatValue())
17838 IsSplat
= (Splat0
== BV1
->getSplatValue());
17840 SmallVector
<SDValue
, 8> Ops
;
17841 SmallSet
<SDValue
, 16> DuplicateOps
;
17842 for (int M
: SVN
->getMask()) {
17843 SDValue Op
= DAG
.getUNDEF(VT
.getScalarType());
17845 int Idx
= M
< (int)NumElts
? M
: M
- NumElts
;
17846 SDValue
&S
= (M
< (int)NumElts
? N0
: N1
);
17847 if (S
.getOpcode() == ISD::BUILD_VECTOR
) {
17848 Op
= S
.getOperand(Idx
);
17849 } else if (S
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
17850 SDValue Op0
= S
.getOperand(0);
17851 Op
= Idx
== 0 ? Op0
: DAG
.getUNDEF(Op0
.getValueType());
17853 // Operand can't be combined - bail out.
17858 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17859 // generating a splat; semantically, this is fine, but it's likely to
17860 // generate low-quality code if the target can't reconstruct an appropriate
17862 if (!Op
.isUndef() && !isa
<ConstantSDNode
>(Op
) && !isa
<ConstantFPSDNode
>(Op
))
17863 if (!IsSplat
&& !DuplicateOps
.insert(Op
).second
)
17869 // BUILD_VECTOR requires all inputs to be of the same type, find the
17870 // maximum type and extend them all.
17871 EVT SVT
= VT
.getScalarType();
17872 if (SVT
.isInteger())
17873 for (SDValue
&Op
: Ops
)
17874 SVT
= (SVT
.bitsLT(Op
.getValueType()) ? Op
.getValueType() : SVT
);
17875 if (SVT
!= VT
.getScalarType())
17876 for (SDValue
&Op
: Ops
)
17877 Op
= TLI
.isZExtFree(Op
.getValueType(), SVT
)
17878 ? DAG
.getZExtOrTrunc(Op
, SDLoc(SVN
), SVT
)
17879 : DAG
.getSExtOrTrunc(Op
, SDLoc(SVN
), SVT
);
17880 return DAG
.getBuildVector(VT
, SDLoc(SVN
), Ops
);
17883 // Match shuffles that can be converted to any_vector_extend_in_reg.
17884 // This is often generated during legalization.
17885 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17886 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17887 static SDValue
combineShuffleToVectorExtend(ShuffleVectorSDNode
*SVN
,
17889 const TargetLowering
&TLI
,
17890 bool LegalOperations
) {
17891 EVT VT
= SVN
->getValueType(0);
17892 bool IsBigEndian
= DAG
.getDataLayout().isBigEndian();
17894 // TODO Add support for big-endian when we have a test case.
17895 if (!VT
.isInteger() || IsBigEndian
)
17898 unsigned NumElts
= VT
.getVectorNumElements();
17899 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
17900 ArrayRef
<int> Mask
= SVN
->getMask();
17901 SDValue N0
= SVN
->getOperand(0);
17903 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17904 auto isAnyExtend
= [&Mask
, &NumElts
](unsigned Scale
) {
17905 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
17908 if ((i
% Scale
) == 0 && Mask
[i
] == (int)(i
/ Scale
))
17915 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17916 // power-of-2 extensions as they are the most likely.
17917 for (unsigned Scale
= 2; Scale
< NumElts
; Scale
*= 2) {
17918 // Check for non power of 2 vector sizes
17919 if (NumElts
% Scale
!= 0)
17921 if (!isAnyExtend(Scale
))
17924 EVT OutSVT
= EVT::getIntegerVT(*DAG
.getContext(), EltSizeInBits
* Scale
);
17925 EVT OutVT
= EVT::getVectorVT(*DAG
.getContext(), OutSVT
, NumElts
/ Scale
);
17926 // Never create an illegal type. Only create unsupported operations if we
17927 // are pre-legalization.
17928 if (TLI
.isTypeLegal(OutVT
))
17929 if (!LegalOperations
||
17930 TLI
.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG
, OutVT
))
17931 return DAG
.getBitcast(VT
,
17932 DAG
.getNode(ISD::ANY_EXTEND_VECTOR_INREG
,
17933 SDLoc(SVN
), OutVT
, N0
));
17939 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17940 // each source element of a large type into the lowest elements of a smaller
17941 // destination type. This is often generated during legalization.
17942 // If the source node itself was a '*_extend_vector_inreg' node then we should
17943 // then be able to remove it.
17944 static SDValue
combineTruncationShuffle(ShuffleVectorSDNode
*SVN
,
17945 SelectionDAG
&DAG
) {
17946 EVT VT
= SVN
->getValueType(0);
17947 bool IsBigEndian
= DAG
.getDataLayout().isBigEndian();
17949 // TODO Add support for big-endian when we have a test case.
17950 if (!VT
.isInteger() || IsBigEndian
)
17953 SDValue N0
= peekThroughBitcasts(SVN
->getOperand(0));
17955 unsigned Opcode
= N0
.getOpcode();
17956 if (Opcode
!= ISD::ANY_EXTEND_VECTOR_INREG
&&
17957 Opcode
!= ISD::SIGN_EXTEND_VECTOR_INREG
&&
17958 Opcode
!= ISD::ZERO_EXTEND_VECTOR_INREG
)
17961 SDValue N00
= N0
.getOperand(0);
17962 ArrayRef
<int> Mask
= SVN
->getMask();
17963 unsigned NumElts
= VT
.getVectorNumElements();
17964 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
17965 unsigned ExtSrcSizeInBits
= N00
.getScalarValueSizeInBits();
17966 unsigned ExtDstSizeInBits
= N0
.getScalarValueSizeInBits();
17968 if (ExtDstSizeInBits
% ExtSrcSizeInBits
!= 0)
17970 unsigned ExtScale
= ExtDstSizeInBits
/ ExtSrcSizeInBits
;
17972 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17973 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17974 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17975 auto isTruncate
= [&Mask
, &NumElts
](unsigned Scale
) {
17976 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
17979 if ((i
* Scale
) < NumElts
&& Mask
[i
] == (int)(i
* Scale
))
17986 // At the moment we just handle the case where we've truncated back to the
17987 // same size as before the extension.
17988 // TODO: handle more extension/truncation cases as cases arise.
17989 if (EltSizeInBits
!= ExtSrcSizeInBits
)
17992 // We can remove *extend_vector_inreg only if the truncation happens at
17993 // the same scale as the extension.
17994 if (isTruncate(ExtScale
))
17995 return DAG
.getBitcast(VT
, N00
);
18000 // Combine shuffles of splat-shuffles of the form:
18001 // shuffle (shuffle V, undef, splat-mask), undef, M
18002 // If splat-mask contains undef elements, we need to be careful about
18003 // introducing undef's in the folded mask which are not the result of composing
18004 // the masks of the shuffles.
18005 static SDValue
combineShuffleOfSplatVal(ShuffleVectorSDNode
*Shuf
,
18006 SelectionDAG
&DAG
) {
18007 if (!Shuf
->getOperand(1).isUndef())
18009 auto *Splat
= dyn_cast
<ShuffleVectorSDNode
>(Shuf
->getOperand(0));
18010 if (!Splat
|| !Splat
->isSplat())
18013 ArrayRef
<int> ShufMask
= Shuf
->getMask();
18014 ArrayRef
<int> SplatMask
= Splat
->getMask();
18015 assert(ShufMask
.size() == SplatMask
.size() && "Mask length mismatch");
18017 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18018 // every undef mask element in the splat-shuffle has a corresponding undef
18019 // element in the user-shuffle's mask or if the composition of mask elements
18020 // would result in undef.
18021 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18022 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18023 // In this case it is not legal to simplify to the splat-shuffle because we
18024 // may be exposing the users of the shuffle an undef element at index 1
18025 // which was not there before the combine.
18026 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18027 // In this case the composition of masks yields SplatMask, so it's ok to
18028 // simplify to the splat-shuffle.
18029 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18030 // In this case the composed mask includes all undef elements of SplatMask
18031 // and in addition sets element zero to undef. It is safe to simplify to
18032 // the splat-shuffle.
18033 auto CanSimplifyToExistingSplat
= [](ArrayRef
<int> UserMask
,
18034 ArrayRef
<int> SplatMask
) {
18035 for (unsigned i
= 0, e
= UserMask
.size(); i
!= e
; ++i
)
18036 if (UserMask
[i
] != -1 && SplatMask
[i
] == -1 &&
18037 SplatMask
[UserMask
[i
]] != -1)
18041 if (CanSimplifyToExistingSplat(ShufMask
, SplatMask
))
18042 return Shuf
->getOperand(0);
18044 // Create a new shuffle with a mask that is composed of the two shuffles'
18046 SmallVector
<int, 32> NewMask
;
18047 for (int Idx
: ShufMask
)
18048 NewMask
.push_back(Idx
== -1 ? -1 : SplatMask
[Idx
]);
18050 return DAG
.getVectorShuffle(Splat
->getValueType(0), SDLoc(Splat
),
18051 Splat
->getOperand(0), Splat
->getOperand(1),
18055 /// If the shuffle mask is taking exactly one element from the first vector
18056 /// operand and passing through all other elements from the second vector
18057 /// operand, return the index of the mask element that is choosing an element
18058 /// from the first operand. Otherwise, return -1.
18059 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef
<int> Mask
) {
18060 int MaskSize
= Mask
.size();
18061 int EltFromOp0
= -1;
18062 // TODO: This does not match if there are undef elements in the shuffle mask.
18063 // Should we ignore undefs in the shuffle mask instead? The trade-off is
18064 // removing an instruction (a shuffle), but losing the knowledge that some
18065 // vector lanes are not needed.
18066 for (int i
= 0; i
!= MaskSize
; ++i
) {
18067 if (Mask
[i
] >= 0 && Mask
[i
] < MaskSize
) {
18068 // We're looking for a shuffle of exactly one element from operand 0.
18069 if (EltFromOp0
!= -1)
18072 } else if (Mask
[i
] != i
+ MaskSize
) {
18073 // Nothing from operand 1 can change lanes.
18080 /// If a shuffle inserts exactly one element from a source vector operand into
18081 /// another vector operand and we can access the specified element as a scalar,
18082 /// then we can eliminate the shuffle.
18083 static SDValue
replaceShuffleOfInsert(ShuffleVectorSDNode
*Shuf
,
18084 SelectionDAG
&DAG
) {
18085 // First, check if we are taking one element of a vector and shuffling that
18086 // element into another vector.
18087 ArrayRef
<int> Mask
= Shuf
->getMask();
18088 SmallVector
<int, 16> CommutedMask(Mask
.begin(), Mask
.end());
18089 SDValue Op0
= Shuf
->getOperand(0);
18090 SDValue Op1
= Shuf
->getOperand(1);
18091 int ShufOp0Index
= getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask
);
18092 if (ShufOp0Index
== -1) {
18093 // Commute mask and check again.
18094 ShuffleVectorSDNode::commuteMask(CommutedMask
);
18095 ShufOp0Index
= getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask
);
18096 if (ShufOp0Index
== -1)
18098 // Commute operands to match the commuted shuffle mask.
18099 std::swap(Op0
, Op1
);
18100 Mask
= CommutedMask
;
18103 // The shuffle inserts exactly one element from operand 0 into operand 1.
18104 // Now see if we can access that element as a scalar via a real insert element
18106 // TODO: We can try harder to locate the element as a scalar. Examples: it
18107 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18108 assert(Mask
[ShufOp0Index
] >= 0 && Mask
[ShufOp0Index
] < (int)Mask
.size() &&
18109 "Shuffle mask value must be from operand 0");
18110 if (Op0
.getOpcode() != ISD::INSERT_VECTOR_ELT
)
18113 auto *InsIndexC
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(2));
18114 if (!InsIndexC
|| InsIndexC
->getSExtValue() != Mask
[ShufOp0Index
])
18117 // There's an existing insertelement with constant insertion index, so we
18118 // don't need to check the legality/profitability of a replacement operation
18119 // that differs at most in the constant value. The target should be able to
18120 // lower any of those in a similar way. If not, legalization will expand this
18121 // to a scalar-to-vector plus shuffle.
18123 // Note that the shuffle may move the scalar from the position that the insert
18124 // element used. Therefore, our new insert element occurs at the shuffle's
18125 // mask index value, not the insert's index value.
18126 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18127 SDValue NewInsIndex
= DAG
.getConstant(ShufOp0Index
, SDLoc(Shuf
),
18128 Op0
.getOperand(2).getValueType());
18129 return DAG
.getNode(ISD::INSERT_VECTOR_ELT
, SDLoc(Shuf
), Op0
.getValueType(),
18130 Op1
, Op0
.getOperand(1), NewInsIndex
);
18133 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18134 /// completely. This has the potential to lose undef knowledge because the first
18135 /// shuffle may not have an undef mask element where the second one does. So
18136 /// only call this after doing simplifications based on demanded elements.
18137 static SDValue
simplifyShuffleOfShuffle(ShuffleVectorSDNode
*Shuf
) {
18138 // shuf (shuf0 X, Y, Mask0), undef, Mask
18139 auto *Shuf0
= dyn_cast
<ShuffleVectorSDNode
>(Shuf
->getOperand(0));
18140 if (!Shuf0
|| !Shuf
->getOperand(1).isUndef())
18143 ArrayRef
<int> Mask
= Shuf
->getMask();
18144 ArrayRef
<int> Mask0
= Shuf0
->getMask();
18145 for (int i
= 0, e
= (int)Mask
.size(); i
!= e
; ++i
) {
18146 // Ignore undef elements.
18149 assert(Mask
[i
] >= 0 && Mask
[i
] < e
&& "Unexpected shuffle mask value");
18151 // Is the element of the shuffle operand chosen by this shuffle the same as
18152 // the element chosen by the shuffle operand itself?
18153 if (Mask0
[Mask
[i
]] != Mask0
[i
])
18156 // Every element of this shuffle is identical to the result of the previous
18157 // shuffle, so we can replace this value.
18158 return Shuf
->getOperand(0);
18161 SDValue
DAGCombiner::visitVECTOR_SHUFFLE(SDNode
*N
) {
18162 EVT VT
= N
->getValueType(0);
18163 unsigned NumElts
= VT
.getVectorNumElements();
18165 SDValue N0
= N
->getOperand(0);
18166 SDValue N1
= N
->getOperand(1);
18168 assert(N0
.getValueType() == VT
&& "Vector shuffle must be normalized in DAG");
18170 // Canonicalize shuffle undef, undef -> undef
18171 if (N0
.isUndef() && N1
.isUndef())
18172 return DAG
.getUNDEF(VT
);
18174 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
);
18176 // Canonicalize shuffle v, v -> v, undef
18178 SmallVector
<int, 8> NewMask
;
18179 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
18180 int Idx
= SVN
->getMaskElt(i
);
18181 if (Idx
>= (int)NumElts
) Idx
-= NumElts
;
18182 NewMask
.push_back(Idx
);
18184 return DAG
.getVectorShuffle(VT
, SDLoc(N
), N0
, DAG
.getUNDEF(VT
), NewMask
);
18187 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
18189 return DAG
.getCommutedVectorShuffle(*SVN
);
18191 // Remove references to rhs if it is undef
18192 if (N1
.isUndef()) {
18193 bool Changed
= false;
18194 SmallVector
<int, 8> NewMask
;
18195 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
18196 int Idx
= SVN
->getMaskElt(i
);
18197 if (Idx
>= (int)NumElts
) {
18201 NewMask
.push_back(Idx
);
18204 return DAG
.getVectorShuffle(VT
, SDLoc(N
), N0
, N1
, NewMask
);
18207 if (SDValue InsElt
= replaceShuffleOfInsert(SVN
, DAG
))
18210 // A shuffle of a single vector that is a splatted value can always be folded.
18211 if (SDValue V
= combineShuffleOfSplatVal(SVN
, DAG
))
18214 // If it is a splat, check if the argument vector is another splat or a
18216 if (SVN
->isSplat() && SVN
->getSplatIndex() < (int)NumElts
) {
18217 int SplatIndex
= SVN
->getSplatIndex();
18218 if (TLI
.isExtractVecEltCheap(VT
, SplatIndex
) &&
18219 ISD::isBinaryOp(N0
.getNode())) {
18220 // splat (vector_bo L, R), Index -->
18221 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18222 SDValue L
= N0
.getOperand(0), R
= N0
.getOperand(1);
18224 EVT EltVT
= VT
.getScalarType();
18225 SDValue Index
= DAG
.getIntPtrConstant(SplatIndex
, DL
);
18226 SDValue ExtL
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, L
, Index
);
18227 SDValue ExtR
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, R
, Index
);
18228 SDValue NewBO
= DAG
.getNode(N0
.getOpcode(), DL
, EltVT
, ExtL
, ExtR
,
18229 N0
.getNode()->getFlags());
18230 SDValue Insert
= DAG
.getNode(ISD::SCALAR_TO_VECTOR
, DL
, VT
, NewBO
);
18231 SmallVector
<int, 16> ZeroMask(VT
.getVectorNumElements(), 0);
18232 return DAG
.getVectorShuffle(VT
, DL
, Insert
, DAG
.getUNDEF(VT
), ZeroMask
);
18235 // If this is a bit convert that changes the element type of the vector but
18236 // not the number of vector elements, look through it. Be careful not to
18237 // look though conversions that change things like v4f32 to v2f64.
18238 SDNode
*V
= N0
.getNode();
18239 if (V
->getOpcode() == ISD::BITCAST
) {
18240 SDValue ConvInput
= V
->getOperand(0);
18241 if (ConvInput
.getValueType().isVector() &&
18242 ConvInput
.getValueType().getVectorNumElements() == NumElts
)
18243 V
= ConvInput
.getNode();
18246 if (V
->getOpcode() == ISD::BUILD_VECTOR
) {
18247 assert(V
->getNumOperands() == NumElts
&&
18248 "BUILD_VECTOR has wrong number of operands");
18250 bool AllSame
= true;
18251 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
18252 if (!V
->getOperand(i
).isUndef()) {
18253 Base
= V
->getOperand(i
);
18257 // Splat of <u, u, u, u>, return <u, u, u, u>
18258 if (!Base
.getNode())
18260 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
18261 if (V
->getOperand(i
) != Base
) {
18266 // Splat of <x, x, x, x>, return <x, x, x, x>
18270 // Canonicalize any other splat as a build_vector.
18271 SDValue Splatted
= V
->getOperand(SplatIndex
);
18272 SmallVector
<SDValue
, 8> Ops(NumElts
, Splatted
);
18273 SDValue NewBV
= DAG
.getBuildVector(V
->getValueType(0), SDLoc(N
), Ops
);
18275 // We may have jumped through bitcasts, so the type of the
18276 // BUILD_VECTOR may not match the type of the shuffle.
18277 if (V
->getValueType(0) != VT
)
18278 NewBV
= DAG
.getBitcast(VT
, NewBV
);
18283 // Simplify source operands based on shuffle mask.
18284 if (SimplifyDemandedVectorElts(SDValue(N
, 0)))
18285 return SDValue(N
, 0);
18287 // This is intentionally placed after demanded elements simplification because
18288 // it could eliminate knowledge of undef elements created by this shuffle.
18289 if (SDValue ShufOp
= simplifyShuffleOfShuffle(SVN
))
18292 // Match shuffles that can be converted to any_vector_extend_in_reg.
18293 if (SDValue V
= combineShuffleToVectorExtend(SVN
, DAG
, TLI
, LegalOperations
))
18296 // Combine "truncate_vector_in_reg" style shuffles.
18297 if (SDValue V
= combineTruncationShuffle(SVN
, DAG
))
18300 if (N0
.getOpcode() == ISD::CONCAT_VECTORS
&&
18301 Level
< AfterLegalizeVectorOps
&&
18303 (N1
.getOpcode() == ISD::CONCAT_VECTORS
&&
18304 N0
.getOperand(0).getValueType() == N1
.getOperand(0).getValueType()))) {
18305 if (SDValue V
= partitionShuffleOfConcats(N
, DAG
))
18309 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18310 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18311 if (Level
< AfterLegalizeDAG
&& TLI
.isTypeLegal(VT
))
18312 if (SDValue Res
= combineShuffleOfScalars(SVN
, DAG
, TLI
))
18315 // If this shuffle only has a single input that is a bitcasted shuffle,
18316 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18317 // back to their original types.
18318 if (N0
.getOpcode() == ISD::BITCAST
&& N0
.hasOneUse() &&
18319 N1
.isUndef() && Level
< AfterLegalizeVectorOps
&&
18320 TLI
.isTypeLegal(VT
)) {
18321 auto ScaleShuffleMask
= [](ArrayRef
<int> Mask
, int Scale
) {
18323 return SmallVector
<int, 8>(Mask
.begin(), Mask
.end());
18325 SmallVector
<int, 8> NewMask
;
18327 for (int s
= 0; s
!= Scale
; ++s
)
18328 NewMask
.push_back(M
< 0 ? -1 : Scale
* M
+ s
);
18332 SDValue BC0
= peekThroughOneUseBitcasts(N0
);
18333 if (BC0
.getOpcode() == ISD::VECTOR_SHUFFLE
&& BC0
.hasOneUse()) {
18334 EVT SVT
= VT
.getScalarType();
18335 EVT InnerVT
= BC0
->getValueType(0);
18336 EVT InnerSVT
= InnerVT
.getScalarType();
18338 // Determine which shuffle works with the smaller scalar type.
18339 EVT ScaleVT
= SVT
.bitsLT(InnerSVT
) ? VT
: InnerVT
;
18340 EVT ScaleSVT
= ScaleVT
.getScalarType();
18342 if (TLI
.isTypeLegal(ScaleVT
) &&
18343 0 == (InnerSVT
.getSizeInBits() % ScaleSVT
.getSizeInBits()) &&
18344 0 == (SVT
.getSizeInBits() % ScaleSVT
.getSizeInBits())) {
18345 int InnerScale
= InnerSVT
.getSizeInBits() / ScaleSVT
.getSizeInBits();
18346 int OuterScale
= SVT
.getSizeInBits() / ScaleSVT
.getSizeInBits();
18348 // Scale the shuffle masks to the smaller scalar type.
18349 ShuffleVectorSDNode
*InnerSVN
= cast
<ShuffleVectorSDNode
>(BC0
);
18350 SmallVector
<int, 8> InnerMask
=
18351 ScaleShuffleMask(InnerSVN
->getMask(), InnerScale
);
18352 SmallVector
<int, 8> OuterMask
=
18353 ScaleShuffleMask(SVN
->getMask(), OuterScale
);
18355 // Merge the shuffle masks.
18356 SmallVector
<int, 8> NewMask
;
18357 for (int M
: OuterMask
)
18358 NewMask
.push_back(M
< 0 ? -1 : InnerMask
[M
]);
18360 // Test for shuffle mask legality over both commutations.
18361 SDValue SV0
= BC0
->getOperand(0);
18362 SDValue SV1
= BC0
->getOperand(1);
18363 bool LegalMask
= TLI
.isShuffleMaskLegal(NewMask
, ScaleVT
);
18365 std::swap(SV0
, SV1
);
18366 ShuffleVectorSDNode::commuteMask(NewMask
);
18367 LegalMask
= TLI
.isShuffleMaskLegal(NewMask
, ScaleVT
);
18371 SV0
= DAG
.getBitcast(ScaleVT
, SV0
);
18372 SV1
= DAG
.getBitcast(ScaleVT
, SV1
);
18373 return DAG
.getBitcast(
18374 VT
, DAG
.getVectorShuffle(ScaleVT
, SDLoc(N
), SV0
, SV1
, NewMask
));
18380 // Canonicalize shuffles according to rules:
18381 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
18382 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
18383 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
18384 if (N1
.getOpcode() == ISD::VECTOR_SHUFFLE
&&
18385 N0
.getOpcode() != ISD::VECTOR_SHUFFLE
&& Level
< AfterLegalizeDAG
&&
18386 TLI
.isTypeLegal(VT
)) {
18387 // The incoming shuffle must be of the same type as the result of the
18388 // current shuffle.
18389 assert(N1
->getOperand(0).getValueType() == VT
&&
18390 "Shuffle types don't match");
18392 SDValue SV0
= N1
->getOperand(0);
18393 SDValue SV1
= N1
->getOperand(1);
18394 bool HasSameOp0
= N0
== SV0
;
18395 bool IsSV1Undef
= SV1
.isUndef();
18396 if (HasSameOp0
|| IsSV1Undef
|| N0
== SV1
)
18397 // Commute the operands of this shuffle so that next rule
18399 return DAG
.getCommutedVectorShuffle(*SVN
);
18402 // Try to fold according to rules:
18403 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18404 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18405 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18406 // Don't try to fold shuffles with illegal type.
18407 // Only fold if this shuffle is the only user of the other shuffle.
18408 if (N0
.getOpcode() == ISD::VECTOR_SHUFFLE
&& N
->isOnlyUserOf(N0
.getNode()) &&
18409 Level
< AfterLegalizeDAG
&& TLI
.isTypeLegal(VT
)) {
18410 ShuffleVectorSDNode
*OtherSV
= cast
<ShuffleVectorSDNode
>(N0
);
18412 // Don't try to fold splats; they're likely to simplify somehow, or they
18414 if (OtherSV
->isSplat())
18417 // The incoming shuffle must be of the same type as the result of the
18418 // current shuffle.
18419 assert(OtherSV
->getOperand(0).getValueType() == VT
&&
18420 "Shuffle types don't match");
18423 SmallVector
<int, 4> Mask
;
18424 // Compute the combined shuffle mask for a shuffle with SV0 as the first
18425 // operand, and SV1 as the second operand.
18426 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
18427 int Idx
= SVN
->getMaskElt(i
);
18429 // Propagate Undef.
18430 Mask
.push_back(Idx
);
18434 SDValue CurrentVec
;
18435 if (Idx
< (int)NumElts
) {
18436 // This shuffle index refers to the inner shuffle N0. Lookup the inner
18437 // shuffle mask to identify which vector is actually referenced.
18438 Idx
= OtherSV
->getMaskElt(Idx
);
18440 // Propagate Undef.
18441 Mask
.push_back(Idx
);
18445 CurrentVec
= (Idx
< (int) NumElts
) ? OtherSV
->getOperand(0)
18446 : OtherSV
->getOperand(1);
18448 // This shuffle index references an element within N1.
18452 // Simple case where 'CurrentVec' is UNDEF.
18453 if (CurrentVec
.isUndef()) {
18454 Mask
.push_back(-1);
18458 // Canonicalize the shuffle index. We don't know yet if CurrentVec
18459 // will be the first or second operand of the combined shuffle.
18460 Idx
= Idx
% NumElts
;
18461 if (!SV0
.getNode() || SV0
== CurrentVec
) {
18462 // Ok. CurrentVec is the left hand side.
18463 // Update the mask accordingly.
18465 Mask
.push_back(Idx
);
18469 // Bail out if we cannot convert the shuffle pair into a single shuffle.
18470 if (SV1
.getNode() && SV1
!= CurrentVec
)
18473 // Ok. CurrentVec is the right hand side.
18474 // Update the mask accordingly.
18476 Mask
.push_back(Idx
+ NumElts
);
18479 // Check if all indices in Mask are Undef. In case, propagate Undef.
18480 bool isUndefMask
= true;
18481 for (unsigned i
= 0; i
!= NumElts
&& isUndefMask
; ++i
)
18482 isUndefMask
&= Mask
[i
] < 0;
18485 return DAG
.getUNDEF(VT
);
18487 if (!SV0
.getNode())
18488 SV0
= DAG
.getUNDEF(VT
);
18489 if (!SV1
.getNode())
18490 SV1
= DAG
.getUNDEF(VT
);
18492 // Avoid introducing shuffles with illegal mask.
18493 if (!TLI
.isShuffleMaskLegal(Mask
, VT
)) {
18494 ShuffleVectorSDNode::commuteMask(Mask
);
18496 if (!TLI
.isShuffleMaskLegal(Mask
, VT
))
18499 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
18500 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
18501 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
18502 std::swap(SV0
, SV1
);
18505 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18506 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18507 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18508 return DAG
.getVectorShuffle(VT
, SDLoc(N
), SV0
, SV1
, Mask
);
18511 if (SDValue V
= foldShuffleOfConcatUndefs(SVN
, DAG
))
18517 SDValue
DAGCombiner::visitSCALAR_TO_VECTOR(SDNode
*N
) {
18518 SDValue InVal
= N
->getOperand(0);
18519 EVT VT
= N
->getValueType(0);
18521 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
18522 // with a VECTOR_SHUFFLE and possible truncate.
18523 if (InVal
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
18524 SDValue InVec
= InVal
->getOperand(0);
18525 SDValue EltNo
= InVal
->getOperand(1);
18526 auto InVecT
= InVec
.getValueType();
18527 if (ConstantSDNode
*C0
= dyn_cast
<ConstantSDNode
>(EltNo
)) {
18528 SmallVector
<int, 8> NewMask(InVecT
.getVectorNumElements(), -1);
18529 int Elt
= C0
->getZExtValue();
18532 // If we have an implict truncate do truncate here as long as it's legal.
18533 // if it's not legal, this should
18534 if (VT
.getScalarType() != InVal
.getValueType() &&
18535 InVal
.getValueType().isScalarInteger() &&
18536 isTypeLegal(VT
.getScalarType())) {
18538 DAG
.getNode(ISD::TRUNCATE
, SDLoc(InVal
), VT
.getScalarType(), InVal
);
18539 return DAG
.getNode(ISD::SCALAR_TO_VECTOR
, SDLoc(N
), VT
, Val
);
18541 if (VT
.getScalarType() == InVecT
.getScalarType() &&
18542 VT
.getVectorNumElements() <= InVecT
.getVectorNumElements() &&
18543 TLI
.isShuffleMaskLegal(NewMask
, VT
)) {
18544 Val
= DAG
.getVectorShuffle(InVecT
, SDLoc(N
), InVec
,
18545 DAG
.getUNDEF(InVecT
), NewMask
);
18546 // If the initial vector is the correct size this shuffle is a
18550 // If not we must truncate the vector.
18551 if (VT
.getVectorNumElements() != InVecT
.getVectorNumElements()) {
18552 MVT IdxTy
= TLI
.getVectorIdxTy(DAG
.getDataLayout());
18553 SDValue ZeroIdx
= DAG
.getConstant(0, SDLoc(N
), IdxTy
);
18555 EVT::getVectorVT(*DAG
.getContext(), InVecT
.getVectorElementType(),
18556 VT
.getVectorNumElements());
18557 Val
= DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, SDLoc(N
), SubVT
, Val
,
18568 SDValue
DAGCombiner::visitINSERT_SUBVECTOR(SDNode
*N
) {
18569 EVT VT
= N
->getValueType(0);
18570 SDValue N0
= N
->getOperand(0);
18571 SDValue N1
= N
->getOperand(1);
18572 SDValue N2
= N
->getOperand(2);
18574 // If inserting an UNDEF, just return the original vector.
18578 // If this is an insert of an extracted vector into an undef vector, we can
18579 // just use the input to the extract.
18580 if (N0
.isUndef() && N1
.getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
18581 N1
.getOperand(1) == N2
&& N1
.getOperand(0).getValueType() == VT
)
18582 return N1
.getOperand(0);
18584 // If we are inserting a bitcast value into an undef, with the same
18585 // number of elements, just use the bitcast input of the extract.
18586 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
18587 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
18588 if (N0
.isUndef() && N1
.getOpcode() == ISD::BITCAST
&&
18589 N1
.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR
&&
18590 N1
.getOperand(0).getOperand(1) == N2
&&
18591 N1
.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
18592 VT
.getVectorNumElements() &&
18593 N1
.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
18594 VT
.getSizeInBits()) {
18595 return DAG
.getBitcast(VT
, N1
.getOperand(0).getOperand(0));
18598 // If both N1 and N2 are bitcast values on which insert_subvector
18599 // would makes sense, pull the bitcast through.
18600 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
18601 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
18602 if (N0
.getOpcode() == ISD::BITCAST
&& N1
.getOpcode() == ISD::BITCAST
) {
18603 SDValue CN0
= N0
.getOperand(0);
18604 SDValue CN1
= N1
.getOperand(0);
18605 EVT CN0VT
= CN0
.getValueType();
18606 EVT CN1VT
= CN1
.getValueType();
18607 if (CN0VT
.isVector() && CN1VT
.isVector() &&
18608 CN0VT
.getVectorElementType() == CN1VT
.getVectorElementType() &&
18609 CN0VT
.getVectorNumElements() == VT
.getVectorNumElements()) {
18610 SDValue NewINSERT
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, SDLoc(N
),
18611 CN0
.getValueType(), CN0
, CN1
, N2
);
18612 return DAG
.getBitcast(VT
, NewINSERT
);
18616 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
18617 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
18618 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
18619 if (N0
.getOpcode() == ISD::INSERT_SUBVECTOR
&&
18620 N0
.getOperand(1).getValueType() == N1
.getValueType() &&
18621 N0
.getOperand(2) == N2
)
18622 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, SDLoc(N
), VT
, N0
.getOperand(0),
18625 // Eliminate an intermediate insert into an undef vector:
18626 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18627 // insert_subvector undef, X, N2
18628 if (N0
.isUndef() && N1
.getOpcode() == ISD::INSERT_SUBVECTOR
&&
18629 N1
.getOperand(0).isUndef() && isNullConstant(N1
.getOperand(2)))
18630 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, SDLoc(N
), VT
, N0
,
18631 N1
.getOperand(1), N2
);
18633 if (!isa
<ConstantSDNode
>(N2
))
18636 unsigned InsIdx
= cast
<ConstantSDNode
>(N2
)->getZExtValue();
18638 // Canonicalize insert_subvector dag nodes.
18640 // (insert_subvector (insert_subvector A, Idx0), Idx1)
18641 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18642 if (N0
.getOpcode() == ISD::INSERT_SUBVECTOR
&& N0
.hasOneUse() &&
18643 N1
.getValueType() == N0
.getOperand(1).getValueType() &&
18644 isa
<ConstantSDNode
>(N0
.getOperand(2))) {
18645 unsigned OtherIdx
= N0
.getConstantOperandVal(2);
18646 if (InsIdx
< OtherIdx
) {
18648 SDValue NewOp
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, SDLoc(N
), VT
,
18649 N0
.getOperand(0), N1
, N2
);
18650 AddToWorklist(NewOp
.getNode());
18651 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, SDLoc(N0
.getNode()),
18652 VT
, NewOp
, N0
.getOperand(1), N0
.getOperand(2));
18656 // If the input vector is a concatenation, and the insert replaces
18657 // one of the pieces, we can optimize into a single concat_vectors.
18658 if (N0
.getOpcode() == ISD::CONCAT_VECTORS
&& N0
.hasOneUse() &&
18659 N0
.getOperand(0).getValueType() == N1
.getValueType()) {
18660 unsigned Factor
= N1
.getValueType().getVectorNumElements();
18662 SmallVector
<SDValue
, 8> Ops(N0
->op_begin(), N0
->op_end());
18663 Ops
[cast
<ConstantSDNode
>(N2
)->getZExtValue() / Factor
] = N1
;
18665 return DAG
.getNode(ISD::CONCAT_VECTORS
, SDLoc(N
), VT
, Ops
);
18668 // Simplify source operands based on insertion.
18669 if (SimplifyDemandedVectorElts(SDValue(N
, 0)))
18670 return SDValue(N
, 0);
18675 SDValue
DAGCombiner::visitFP_TO_FP16(SDNode
*N
) {
18676 SDValue N0
= N
->getOperand(0);
18678 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18679 if (N0
->getOpcode() == ISD::FP16_TO_FP
)
18680 return N0
->getOperand(0);
18685 SDValue
DAGCombiner::visitFP16_TO_FP(SDNode
*N
) {
18686 SDValue N0
= N
->getOperand(0);
18688 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18689 if (N0
->getOpcode() == ISD::AND
) {
18690 ConstantSDNode
*AndConst
= getAsNonOpaqueConstant(N0
.getOperand(1));
18691 if (AndConst
&& AndConst
->getAPIntValue() == 0xffff) {
18692 return DAG
.getNode(ISD::FP16_TO_FP
, SDLoc(N
), N
->getValueType(0),
18700 SDValue
DAGCombiner::visitVECREDUCE(SDNode
*N
) {
18701 SDValue N0
= N
->getOperand(0);
18702 EVT VT
= N0
.getValueType();
18703 unsigned Opcode
= N
->getOpcode();
18705 // VECREDUCE over 1-element vector is just an extract.
18706 if (VT
.getVectorNumElements() == 1) {
18708 SDValue Res
= DAG
.getNode(
18709 ISD::EXTRACT_VECTOR_ELT
, dl
, VT
.getVectorElementType(), N0
,
18710 DAG
.getConstant(0, dl
, TLI
.getVectorIdxTy(DAG
.getDataLayout())));
18711 if (Res
.getValueType() != N
->getValueType(0))
18712 Res
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, N
->getValueType(0), Res
);
18716 // On an boolean vector an and/or reduction is the same as a umin/umax
18717 // reduction. Convert them if the latter is legal while the former isn't.
18718 if (Opcode
== ISD::VECREDUCE_AND
|| Opcode
== ISD::VECREDUCE_OR
) {
18719 unsigned NewOpcode
= Opcode
== ISD::VECREDUCE_AND
18720 ? ISD::VECREDUCE_UMIN
: ISD::VECREDUCE_UMAX
;
18721 if (!TLI
.isOperationLegalOrCustom(Opcode
, VT
) &&
18722 TLI
.isOperationLegalOrCustom(NewOpcode
, VT
) &&
18723 DAG
.ComputeNumSignBits(N0
) == VT
.getScalarSizeInBits())
18724 return DAG
.getNode(NewOpcode
, SDLoc(N
), N
->getValueType(0), N0
);
18730 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18731 /// with the destination vector and a zero vector.
18732 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18733 /// vector_shuffle V, Zero, <0, 4, 2, 4>
18734 SDValue
DAGCombiner::XformToShuffleWithZero(SDNode
*N
) {
18735 assert(N
->getOpcode() == ISD::AND
&& "Unexpected opcode!");
18737 EVT VT
= N
->getValueType(0);
18738 SDValue LHS
= N
->getOperand(0);
18739 SDValue RHS
= peekThroughBitcasts(N
->getOperand(1));
18742 // Make sure we're not running after operation legalization where it
18743 // may have custom lowered the vector shuffles.
18744 if (LegalOperations
)
18747 if (RHS
.getOpcode() != ISD::BUILD_VECTOR
)
18750 EVT RVT
= RHS
.getValueType();
18751 unsigned NumElts
= RHS
.getNumOperands();
18753 // Attempt to create a valid clear mask, splitting the mask into
18754 // sub elements and checking to see if each is
18755 // all zeros or all ones - suitable for shuffle masking.
18756 auto BuildClearMask
= [&](int Split
) {
18757 int NumSubElts
= NumElts
* Split
;
18758 int NumSubBits
= RVT
.getScalarSizeInBits() / Split
;
18760 SmallVector
<int, 8> Indices
;
18761 for (int i
= 0; i
!= NumSubElts
; ++i
) {
18762 int EltIdx
= i
/ Split
;
18763 int SubIdx
= i
% Split
;
18764 SDValue Elt
= RHS
.getOperand(EltIdx
);
18765 if (Elt
.isUndef()) {
18766 Indices
.push_back(-1);
18771 if (isa
<ConstantSDNode
>(Elt
))
18772 Bits
= cast
<ConstantSDNode
>(Elt
)->getAPIntValue();
18773 else if (isa
<ConstantFPSDNode
>(Elt
))
18774 Bits
= cast
<ConstantFPSDNode
>(Elt
)->getValueAPF().bitcastToAPInt();
18778 // Extract the sub element from the constant bit mask.
18779 if (DAG
.getDataLayout().isBigEndian()) {
18780 Bits
.lshrInPlace((Split
- SubIdx
- 1) * NumSubBits
);
18782 Bits
.lshrInPlace(SubIdx
* NumSubBits
);
18786 Bits
= Bits
.trunc(NumSubBits
);
18788 if (Bits
.isAllOnesValue())
18789 Indices
.push_back(i
);
18790 else if (Bits
== 0)
18791 Indices
.push_back(i
+ NumSubElts
);
18796 // Let's see if the target supports this vector_shuffle.
18797 EVT ClearSVT
= EVT::getIntegerVT(*DAG
.getContext(), NumSubBits
);
18798 EVT ClearVT
= EVT::getVectorVT(*DAG
.getContext(), ClearSVT
, NumSubElts
);
18799 if (!TLI
.isVectorClearMaskLegal(Indices
, ClearVT
))
18802 SDValue Zero
= DAG
.getConstant(0, DL
, ClearVT
);
18803 return DAG
.getBitcast(VT
, DAG
.getVectorShuffle(ClearVT
, DL
,
18804 DAG
.getBitcast(ClearVT
, LHS
),
18808 // Determine maximum split level (byte level masking).
18810 if (RVT
.getScalarSizeInBits() % 8 == 0)
18811 MaxSplit
= RVT
.getScalarSizeInBits() / 8;
18813 for (int Split
= 1; Split
<= MaxSplit
; ++Split
)
18814 if (RVT
.getScalarSizeInBits() % Split
== 0)
18815 if (SDValue S
= BuildClearMask(Split
))
18821 /// If a vector binop is performed on splat values, it may be profitable to
18822 /// extract, scalarize, and insert/splat.
18823 static SDValue
scalarizeBinOpOfSplats(SDNode
*N
, SelectionDAG
&DAG
) {
18824 SDValue N0
= N
->getOperand(0);
18825 SDValue N1
= N
->getOperand(1);
18826 unsigned Opcode
= N
->getOpcode();
18827 EVT VT
= N
->getValueType(0);
18828 EVT EltVT
= VT
.getVectorElementType();
18829 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
18831 // TODO: Remove/replace the extract cost check? If the elements are available
18832 // as scalars, then there may be no extract cost. Should we ask if
18833 // inserting a scalar back into a vector is cheap instead?
18834 int Index0
, Index1
;
18835 SDValue Src0
= DAG
.getSplatSourceVector(N0
, Index0
);
18836 SDValue Src1
= DAG
.getSplatSourceVector(N1
, Index1
);
18837 if (!Src0
|| !Src1
|| Index0
!= Index1
||
18838 Src0
.getValueType().getVectorElementType() != EltVT
||
18839 Src1
.getValueType().getVectorElementType() != EltVT
||
18840 !TLI
.isExtractVecEltCheap(VT
, Index0
) ||
18841 !TLI
.isOperationLegalOrCustom(Opcode
, EltVT
))
18846 DAG
.getConstant(Index0
, DL
, TLI
.getVectorIdxTy(DAG
.getDataLayout()));
18847 SDValue X
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, N0
, IndexC
);
18848 SDValue Y
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, N1
, IndexC
);
18849 SDValue ScalarBO
= DAG
.getNode(Opcode
, DL
, EltVT
, X
, Y
, N
->getFlags());
18851 // If all lanes but 1 are undefined, no need to splat the scalar result.
18852 // TODO: Keep track of undefs and use that info in the general case.
18853 if (N0
.getOpcode() == ISD::BUILD_VECTOR
&& N0
.getOpcode() == N1
.getOpcode() &&
18854 count_if(N0
->ops(), [](SDValue V
) { return !V
.isUndef(); }) == 1 &&
18855 count_if(N1
->ops(), [](SDValue V
) { return !V
.isUndef(); }) == 1) {
18856 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
18857 // build_vec ..undef, (bo X, Y), undef...
18858 SmallVector
<SDValue
, 8> Ops(VT
.getVectorNumElements(), DAG
.getUNDEF(EltVT
));
18859 Ops
[Index0
] = ScalarBO
;
18860 return DAG
.getBuildVector(VT
, DL
, Ops
);
18863 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
18864 SmallVector
<SDValue
, 8> Ops(VT
.getVectorNumElements(), ScalarBO
);
18865 return DAG
.getBuildVector(VT
, DL
, Ops
);
18868 /// Visit a binary vector operation, like ADD.
18869 SDValue
DAGCombiner::SimplifyVBinOp(SDNode
*N
) {
18870 assert(N
->getValueType(0).isVector() &&
18871 "SimplifyVBinOp only works on vectors!");
18873 SDValue LHS
= N
->getOperand(0);
18874 SDValue RHS
= N
->getOperand(1);
18875 SDValue Ops
[] = {LHS
, RHS
};
18876 EVT VT
= N
->getValueType(0);
18877 unsigned Opcode
= N
->getOpcode();
18879 // See if we can constant fold the vector operation.
18880 if (SDValue Fold
= DAG
.FoldConstantVectorArithmetic(
18881 Opcode
, SDLoc(LHS
), LHS
.getValueType(), Ops
, N
->getFlags()))
18884 // Move unary shuffles with identical masks after a vector binop:
18885 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
18886 // --> shuffle (VBinOp A, B), Undef, Mask
18887 // This does not require type legality checks because we are creating the
18888 // same types of operations that are in the original sequence. We do have to
18889 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
18890 // though. This code is adapted from the identical transform in instcombine.
18891 if (Opcode
!= ISD::UDIV
&& Opcode
!= ISD::SDIV
&&
18892 Opcode
!= ISD::UREM
&& Opcode
!= ISD::SREM
&&
18893 Opcode
!= ISD::UDIVREM
&& Opcode
!= ISD::SDIVREM
) {
18894 auto *Shuf0
= dyn_cast
<ShuffleVectorSDNode
>(LHS
);
18895 auto *Shuf1
= dyn_cast
<ShuffleVectorSDNode
>(RHS
);
18896 if (Shuf0
&& Shuf1
&& Shuf0
->getMask().equals(Shuf1
->getMask()) &&
18897 LHS
.getOperand(1).isUndef() && RHS
.getOperand(1).isUndef() &&
18898 (LHS
.hasOneUse() || RHS
.hasOneUse() || LHS
== RHS
)) {
18900 SDValue NewBinOp
= DAG
.getNode(Opcode
, DL
, VT
, LHS
.getOperand(0),
18901 RHS
.getOperand(0), N
->getFlags());
18902 SDValue UndefV
= LHS
.getOperand(1);
18903 return DAG
.getVectorShuffle(VT
, DL
, NewBinOp
, UndefV
, Shuf0
->getMask());
18907 // The following pattern is likely to emerge with vector reduction ops. Moving
18908 // the binary operation ahead of insertion may allow using a narrower vector
18909 // instruction that has better performance than the wide version of the op:
18910 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
18911 if (LHS
.getOpcode() == ISD::INSERT_SUBVECTOR
&& LHS
.getOperand(0).isUndef() &&
18912 RHS
.getOpcode() == ISD::INSERT_SUBVECTOR
&& RHS
.getOperand(0).isUndef() &&
18913 LHS
.getOperand(2) == RHS
.getOperand(2) &&
18914 (LHS
.hasOneUse() || RHS
.hasOneUse())) {
18915 SDValue X
= LHS
.getOperand(1);
18916 SDValue Y
= RHS
.getOperand(1);
18917 SDValue Z
= LHS
.getOperand(2);
18918 EVT NarrowVT
= X
.getValueType();
18919 if (NarrowVT
== Y
.getValueType() &&
18920 TLI
.isOperationLegalOrCustomOrPromote(Opcode
, NarrowVT
)) {
18921 // (binop undef, undef) may not return undef, so compute that result.
18924 DAG
.getNode(Opcode
, DL
, VT
, DAG
.getUNDEF(VT
), DAG
.getUNDEF(VT
));
18925 SDValue NarrowBO
= DAG
.getNode(Opcode
, DL
, NarrowVT
, X
, Y
);
18926 return DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
, VecC
, NarrowBO
, Z
);
18930 if (SDValue V
= scalarizeBinOpOfSplats(N
, DAG
))
18936 SDValue
DAGCombiner::SimplifySelect(const SDLoc
&DL
, SDValue N0
, SDValue N1
,
18938 assert(N0
.getOpcode() ==ISD::SETCC
&& "First argument must be a SetCC node!");
18940 SDValue SCC
= SimplifySelectCC(DL
, N0
.getOperand(0), N0
.getOperand(1), N1
, N2
,
18941 cast
<CondCodeSDNode
>(N0
.getOperand(2))->get());
18943 // If we got a simplified select_cc node back from SimplifySelectCC, then
18944 // break it down into a new SETCC node, and a new SELECT node, and then return
18945 // the SELECT node, since we were called with a SELECT node.
18946 if (SCC
.getNode()) {
18947 // Check to see if we got a select_cc back (to turn into setcc/select).
18948 // Otherwise, just return whatever node we got back, like fabs.
18949 if (SCC
.getOpcode() == ISD::SELECT_CC
) {
18950 SDValue SETCC
= DAG
.getNode(ISD::SETCC
, SDLoc(N0
),
18952 SCC
.getOperand(0), SCC
.getOperand(1),
18953 SCC
.getOperand(4));
18954 AddToWorklist(SETCC
.getNode());
18955 return DAG
.getSelect(SDLoc(SCC
), SCC
.getValueType(), SETCC
,
18956 SCC
.getOperand(2), SCC
.getOperand(3));
18964 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18965 /// being selected between, see if we can simplify the select. Callers of this
18966 /// should assume that TheSelect is deleted if this returns true. As such, they
18967 /// should return the appropriate thing (e.g. the node) back to the top-level of
18968 /// the DAG combiner loop to avoid it being looked at.
18969 bool DAGCombiner::SimplifySelectOps(SDNode
*TheSelect
, SDValue LHS
,
18971 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18972 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18973 if (const ConstantFPSDNode
*NaN
= isConstOrConstSplatFP(LHS
)) {
18974 if (NaN
->isNaN() && RHS
.getOpcode() == ISD::FSQRT
) {
18975 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18976 SDValue Sqrt
= RHS
;
18979 const ConstantFPSDNode
*Zero
= nullptr;
18981 if (TheSelect
->getOpcode() == ISD::SELECT_CC
) {
18982 CC
= cast
<CondCodeSDNode
>(TheSelect
->getOperand(4))->get();
18983 CmpLHS
= TheSelect
->getOperand(0);
18984 Zero
= isConstOrConstSplatFP(TheSelect
->getOperand(1));
18986 // SELECT or VSELECT
18987 SDValue Cmp
= TheSelect
->getOperand(0);
18988 if (Cmp
.getOpcode() == ISD::SETCC
) {
18989 CC
= cast
<CondCodeSDNode
>(Cmp
.getOperand(2))->get();
18990 CmpLHS
= Cmp
.getOperand(0);
18991 Zero
= isConstOrConstSplatFP(Cmp
.getOperand(1));
18994 if (Zero
&& Zero
->isZero() &&
18995 Sqrt
.getOperand(0) == CmpLHS
&& (CC
== ISD::SETOLT
||
18996 CC
== ISD::SETULT
|| CC
== ISD::SETLT
)) {
18997 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18998 CombineTo(TheSelect
, Sqrt
);
19003 // Cannot simplify select with vector condition
19004 if (TheSelect
->getOperand(0).getValueType().isVector()) return false;
19006 // If this is a select from two identical things, try to pull the operation
19007 // through the select.
19008 if (LHS
.getOpcode() != RHS
.getOpcode() ||
19009 !LHS
.hasOneUse() || !RHS
.hasOneUse())
19012 // If this is a load and the token chain is identical, replace the select
19013 // of two loads with a load through a select of the address to load from.
19014 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19015 // constants have been dropped into the constant pool.
19016 if (LHS
.getOpcode() == ISD::LOAD
) {
19017 LoadSDNode
*LLD
= cast
<LoadSDNode
>(LHS
);
19018 LoadSDNode
*RLD
= cast
<LoadSDNode
>(RHS
);
19020 // Token chains must be identical.
19021 if (LHS
.getOperand(0) != RHS
.getOperand(0) ||
19022 // Do not let this transformation reduce the number of volatile loads.
19023 LLD
->isVolatile() || RLD
->isVolatile() ||
19024 // FIXME: If either is a pre/post inc/dec load,
19025 // we'd need to split out the address adjustment.
19026 LLD
->isIndexed() || RLD
->isIndexed() ||
19027 // If this is an EXTLOAD, the VT's must match.
19028 LLD
->getMemoryVT() != RLD
->getMemoryVT() ||
19029 // If this is an EXTLOAD, the kind of extension must match.
19030 (LLD
->getExtensionType() != RLD
->getExtensionType() &&
19031 // The only exception is if one of the extensions is anyext.
19032 LLD
->getExtensionType() != ISD::EXTLOAD
&&
19033 RLD
->getExtensionType() != ISD::EXTLOAD
) ||
19034 // FIXME: this discards src value information. This is
19035 // over-conservative. It would be beneficial to be able to remember
19036 // both potential memory locations. Since we are discarding
19037 // src value info, don't do the transformation if the memory
19038 // locations are not in the default address space.
19039 LLD
->getPointerInfo().getAddrSpace() != 0 ||
19040 RLD
->getPointerInfo().getAddrSpace() != 0 ||
19041 // We can't produce a CMOV of a TargetFrameIndex since we won't
19042 // generate the address generation required.
19043 LLD
->getBasePtr().getOpcode() == ISD::TargetFrameIndex
||
19044 RLD
->getBasePtr().getOpcode() == ISD::TargetFrameIndex
||
19045 !TLI
.isOperationLegalOrCustom(TheSelect
->getOpcode(),
19046 LLD
->getBasePtr().getValueType()))
19049 // The loads must not depend on one another.
19050 if (LLD
->isPredecessorOf(RLD
) || RLD
->isPredecessorOf(LLD
))
19053 // Check that the select condition doesn't reach either load. If so,
19054 // folding this will induce a cycle into the DAG. If not, this is safe to
19055 // xform, so create a select of the addresses.
19057 SmallPtrSet
<const SDNode
*, 32> Visited
;
19058 SmallVector
<const SDNode
*, 16> Worklist
;
19060 // Always fail if LLD and RLD are not independent. TheSelect is a
19061 // predecessor to all Nodes in question so we need not search past it.
19063 Visited
.insert(TheSelect
);
19064 Worklist
.push_back(LLD
);
19065 Worklist
.push_back(RLD
);
19067 if (SDNode::hasPredecessorHelper(LLD
, Visited
, Worklist
) ||
19068 SDNode::hasPredecessorHelper(RLD
, Visited
, Worklist
))
19072 if (TheSelect
->getOpcode() == ISD::SELECT
) {
19073 // We cannot do this optimization if any pair of {RLD, LLD} is a
19074 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19075 // Loads, we only need to check if CondNode is a successor to one of the
19076 // loads. We can further avoid this if there's no use of their chain
19078 SDNode
*CondNode
= TheSelect
->getOperand(0).getNode();
19079 Worklist
.push_back(CondNode
);
19081 if ((LLD
->hasAnyUseOfValue(1) &&
19082 SDNode::hasPredecessorHelper(LLD
, Visited
, Worklist
)) ||
19083 (RLD
->hasAnyUseOfValue(1) &&
19084 SDNode::hasPredecessorHelper(RLD
, Visited
, Worklist
)))
19087 Addr
= DAG
.getSelect(SDLoc(TheSelect
),
19088 LLD
->getBasePtr().getValueType(),
19089 TheSelect
->getOperand(0), LLD
->getBasePtr(),
19090 RLD
->getBasePtr());
19091 } else { // Otherwise SELECT_CC
19092 // We cannot do this optimization if any pair of {RLD, LLD} is a
19093 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19094 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19095 // one of the loads. We can further avoid this if there's no use of their
19098 SDNode
*CondLHS
= TheSelect
->getOperand(0).getNode();
19099 SDNode
*CondRHS
= TheSelect
->getOperand(1).getNode();
19100 Worklist
.push_back(CondLHS
);
19101 Worklist
.push_back(CondRHS
);
19103 if ((LLD
->hasAnyUseOfValue(1) &&
19104 SDNode::hasPredecessorHelper(LLD
, Visited
, Worklist
)) ||
19105 (RLD
->hasAnyUseOfValue(1) &&
19106 SDNode::hasPredecessorHelper(RLD
, Visited
, Worklist
)))
19109 Addr
= DAG
.getNode(ISD::SELECT_CC
, SDLoc(TheSelect
),
19110 LLD
->getBasePtr().getValueType(),
19111 TheSelect
->getOperand(0),
19112 TheSelect
->getOperand(1),
19113 LLD
->getBasePtr(), RLD
->getBasePtr(),
19114 TheSelect
->getOperand(4));
19118 // It is safe to replace the two loads if they have different alignments,
19119 // but the new load must be the minimum (most restrictive) alignment of the
19121 unsigned Alignment
= std::min(LLD
->getAlignment(), RLD
->getAlignment());
19122 MachineMemOperand::Flags MMOFlags
= LLD
->getMemOperand()->getFlags();
19123 if (!RLD
->isInvariant())
19124 MMOFlags
&= ~MachineMemOperand::MOInvariant
;
19125 if (!RLD
->isDereferenceable())
19126 MMOFlags
&= ~MachineMemOperand::MODereferenceable
;
19127 if (LLD
->getExtensionType() == ISD::NON_EXTLOAD
) {
19128 // FIXME: Discards pointer and AA info.
19129 Load
= DAG
.getLoad(TheSelect
->getValueType(0), SDLoc(TheSelect
),
19130 LLD
->getChain(), Addr
, MachinePointerInfo(), Alignment
,
19133 // FIXME: Discards pointer and AA info.
19134 Load
= DAG
.getExtLoad(
19135 LLD
->getExtensionType() == ISD::EXTLOAD
? RLD
->getExtensionType()
19136 : LLD
->getExtensionType(),
19137 SDLoc(TheSelect
), TheSelect
->getValueType(0), LLD
->getChain(), Addr
,
19138 MachinePointerInfo(), LLD
->getMemoryVT(), Alignment
, MMOFlags
);
19141 // Users of the select now use the result of the load.
19142 CombineTo(TheSelect
, Load
);
19144 // Users of the old loads now use the new load's chain. We know the
19145 // old-load value is dead now.
19146 CombineTo(LHS
.getNode(), Load
.getValue(0), Load
.getValue(1));
19147 CombineTo(RHS
.getNode(), Load
.getValue(0), Load
.getValue(1));
19154 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19156 SDValue
DAGCombiner::foldSelectCCToShiftAnd(const SDLoc
&DL
, SDValue N0
,
19157 SDValue N1
, SDValue N2
, SDValue N3
,
19158 ISD::CondCode CC
) {
19159 // If this is a select where the false operand is zero and the compare is a
19160 // check of the sign bit, see if we can perform the "gzip trick":
19161 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19162 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19163 EVT XType
= N0
.getValueType();
19164 EVT AType
= N2
.getValueType();
19165 if (!isNullConstant(N3
) || !XType
.bitsGE(AType
))
19168 // If the comparison is testing for a positive value, we have to invert
19169 // the sign bit mask, so only do that transform if the target has a bitwise
19170 // 'and not' instruction (the invert is free).
19171 if (CC
== ISD::SETGT
&& TLI
.hasAndNot(N2
)) {
19172 // (X > -1) ? A : 0
19173 // (X > 0) ? X : 0 <-- This is canonical signed max.
19174 if (!(isAllOnesConstant(N1
) || (isNullConstant(N1
) && N0
== N2
)))
19176 } else if (CC
== ISD::SETLT
) {
19178 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
19179 if (!(isNullConstant(N1
) || (isOneConstant(N1
) && N0
== N2
)))
19185 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19187 EVT ShiftAmtTy
= getShiftAmountTy(N0
.getValueType());
19188 auto *N2C
= dyn_cast
<ConstantSDNode
>(N2
.getNode());
19189 if (N2C
&& ((N2C
->getAPIntValue() & (N2C
->getAPIntValue() - 1)) == 0)) {
19190 unsigned ShCt
= XType
.getSizeInBits() - N2C
->getAPIntValue().logBase2() - 1;
19191 SDValue ShiftAmt
= DAG
.getConstant(ShCt
, DL
, ShiftAmtTy
);
19192 SDValue Shift
= DAG
.getNode(ISD::SRL
, DL
, XType
, N0
, ShiftAmt
);
19193 AddToWorklist(Shift
.getNode());
19195 if (XType
.bitsGT(AType
)) {
19196 Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, AType
, Shift
);
19197 AddToWorklist(Shift
.getNode());
19200 if (CC
== ISD::SETGT
)
19201 Shift
= DAG
.getNOT(DL
, Shift
, AType
);
19203 return DAG
.getNode(ISD::AND
, DL
, AType
, Shift
, N2
);
19206 SDValue ShiftAmt
= DAG
.getConstant(XType
.getSizeInBits() - 1, DL
, ShiftAmtTy
);
19207 SDValue Shift
= DAG
.getNode(ISD::SRA
, DL
, XType
, N0
, ShiftAmt
);
19208 AddToWorklist(Shift
.getNode());
19210 if (XType
.bitsGT(AType
)) {
19211 Shift
= DAG
.getNode(ISD::TRUNCATE
, DL
, AType
, Shift
);
19212 AddToWorklist(Shift
.getNode());
19215 if (CC
== ISD::SETGT
)
19216 Shift
= DAG
.getNOT(DL
, Shift
, AType
);
19218 return DAG
.getNode(ISD::AND
, DL
, AType
, Shift
, N2
);
19221 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19222 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19223 /// in it. This may be a win when the constant is not otherwise available
19224 /// because it replaces two constant pool loads with one.
19225 SDValue
DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19226 const SDLoc
&DL
, SDValue N0
, SDValue N1
, SDValue N2
, SDValue N3
,
19227 ISD::CondCode CC
) {
19228 if (!TLI
.reduceSelectOfFPConstantLoads(N0
.getValueType().isFloatingPoint()))
19231 // If we are before legalize types, we want the other legalization to happen
19232 // first (for example, to avoid messing with soft float).
19233 auto *TV
= dyn_cast
<ConstantFPSDNode
>(N2
);
19234 auto *FV
= dyn_cast
<ConstantFPSDNode
>(N3
);
19235 EVT VT
= N2
.getValueType();
19236 if (!TV
|| !FV
|| !TLI
.isTypeLegal(VT
))
19239 // If a constant can be materialized without loads, this does not make sense.
19240 if (TLI
.getOperationAction(ISD::ConstantFP
, VT
) == TargetLowering::Legal
||
19241 TLI
.isFPImmLegal(TV
->getValueAPF(), TV
->getValueType(0), ForCodeSize
) ||
19242 TLI
.isFPImmLegal(FV
->getValueAPF(), FV
->getValueType(0), ForCodeSize
))
19245 // If both constants have multiple uses, then we won't need to do an extra
19246 // load. The values are likely around in registers for other users.
19247 if (!TV
->hasOneUse() && !FV
->hasOneUse())
19250 Constant
*Elts
[] = { const_cast<ConstantFP
*>(FV
->getConstantFPValue()),
19251 const_cast<ConstantFP
*>(TV
->getConstantFPValue()) };
19252 Type
*FPTy
= Elts
[0]->getType();
19253 const DataLayout
&TD
= DAG
.getDataLayout();
19255 // Create a ConstantArray of the two constants.
19256 Constant
*CA
= ConstantArray::get(ArrayType::get(FPTy
, 2), Elts
);
19257 SDValue CPIdx
= DAG
.getConstantPool(CA
, TLI
.getPointerTy(DAG
.getDataLayout()),
19258 TD
.getPrefTypeAlignment(FPTy
));
19259 unsigned Alignment
= cast
<ConstantPoolSDNode
>(CPIdx
)->getAlignment();
19261 // Get offsets to the 0 and 1 elements of the array, so we can select between
19263 SDValue Zero
= DAG
.getIntPtrConstant(0, DL
);
19264 unsigned EltSize
= (unsigned)TD
.getTypeAllocSize(Elts
[0]->getType());
19265 SDValue One
= DAG
.getIntPtrConstant(EltSize
, SDLoc(FV
));
19267 DAG
.getSetCC(DL
, getSetCCResultType(N0
.getValueType()), N0
, N1
, CC
);
19268 AddToWorklist(Cond
.getNode());
19269 SDValue CstOffset
= DAG
.getSelect(DL
, Zero
.getValueType(), Cond
, One
, Zero
);
19270 AddToWorklist(CstOffset
.getNode());
19271 CPIdx
= DAG
.getNode(ISD::ADD
, DL
, CPIdx
.getValueType(), CPIdx
, CstOffset
);
19272 AddToWorklist(CPIdx
.getNode());
19273 return DAG
.getLoad(TV
->getValueType(0), DL
, DAG
.getEntryNode(), CPIdx
,
19274 MachinePointerInfo::getConstantPool(
19275 DAG
.getMachineFunction()), Alignment
);
19278 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19279 /// where 'cond' is the comparison specified by CC.
19280 SDValue
DAGCombiner::SimplifySelectCC(const SDLoc
&DL
, SDValue N0
, SDValue N1
,
19281 SDValue N2
, SDValue N3
, ISD::CondCode CC
,
19282 bool NotExtCompare
) {
19283 // (x ? y : y) -> y.
19284 if (N2
== N3
) return N2
;
19286 EVT CmpOpVT
= N0
.getValueType();
19287 EVT CmpResVT
= getSetCCResultType(CmpOpVT
);
19288 EVT VT
= N2
.getValueType();
19289 auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode());
19290 auto *N2C
= dyn_cast
<ConstantSDNode
>(N2
.getNode());
19291 auto *N3C
= dyn_cast
<ConstantSDNode
>(N3
.getNode());
19293 // Determine if the condition we're dealing with is constant.
19294 if (SDValue SCC
= DAG
.FoldSetCC(CmpResVT
, N0
, N1
, CC
, DL
)) {
19295 AddToWorklist(SCC
.getNode());
19296 if (auto *SCCC
= dyn_cast
<ConstantSDNode
>(SCC
)) {
19297 // fold select_cc true, x, y -> x
19298 // fold select_cc false, x, y -> y
19299 return !(SCCC
->isNullValue()) ? N2
: N3
;
19304 convertSelectOfFPConstantsToLoadOffset(DL
, N0
, N1
, N2
, N3
, CC
))
19307 if (SDValue V
= foldSelectCCToShiftAnd(DL
, N0
, N1
, N2
, N3
, CC
))
19310 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
19311 // where y is has a single bit set.
19312 // A plaintext description would be, we can turn the SELECT_CC into an AND
19313 // when the condition can be materialized as an all-ones register. Any
19314 // single bit-test can be materialized as an all-ones register with
19315 // shift-left and shift-right-arith.
19316 if (CC
== ISD::SETEQ
&& N0
->getOpcode() == ISD::AND
&&
19317 N0
->getValueType(0) == VT
&& isNullConstant(N1
) && isNullConstant(N2
)) {
19318 SDValue AndLHS
= N0
->getOperand(0);
19319 auto *ConstAndRHS
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
19320 if (ConstAndRHS
&& ConstAndRHS
->getAPIntValue().countPopulation() == 1) {
19321 // Shift the tested bit over the sign bit.
19322 const APInt
&AndMask
= ConstAndRHS
->getAPIntValue();
19324 DAG
.getConstant(AndMask
.countLeadingZeros(), SDLoc(AndLHS
),
19325 getShiftAmountTy(AndLHS
.getValueType()));
19326 SDValue Shl
= DAG
.getNode(ISD::SHL
, SDLoc(N0
), VT
, AndLHS
, ShlAmt
);
19328 // Now arithmetic right shift it all the way over, so the result is either
19329 // all-ones, or zero.
19331 DAG
.getConstant(AndMask
.getBitWidth() - 1, SDLoc(Shl
),
19332 getShiftAmountTy(Shl
.getValueType()));
19333 SDValue Shr
= DAG
.getNode(ISD::SRA
, SDLoc(N0
), VT
, Shl
, ShrAmt
);
19335 return DAG
.getNode(ISD::AND
, DL
, VT
, Shr
, N3
);
19339 // fold select C, 16, 0 -> shl C, 4
19340 bool Fold
= N2C
&& isNullConstant(N3
) && N2C
->getAPIntValue().isPowerOf2();
19341 bool Swap
= N3C
&& isNullConstant(N2
) && N3C
->getAPIntValue().isPowerOf2();
19343 if ((Fold
|| Swap
) &&
19344 TLI
.getBooleanContents(CmpOpVT
) ==
19345 TargetLowering::ZeroOrOneBooleanContent
&&
19346 (!LegalOperations
|| TLI
.isOperationLegal(ISD::SETCC
, CmpOpVT
))) {
19349 CC
= ISD::getSetCCInverse(CC
, CmpOpVT
.isInteger());
19350 std::swap(N2C
, N3C
);
19353 // If the caller doesn't want us to simplify this into a zext of a compare,
19355 if (NotExtCompare
&& N2C
->isOne())
19359 // zext (setcc n0, n1)
19361 SCC
= DAG
.getSetCC(DL
, CmpResVT
, N0
, N1
, CC
);
19362 if (VT
.bitsLT(SCC
.getValueType()))
19363 Temp
= DAG
.getZeroExtendInReg(SCC
, SDLoc(N2
), VT
);
19365 Temp
= DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(N2
), VT
, SCC
);
19367 SCC
= DAG
.getSetCC(SDLoc(N0
), MVT::i1
, N0
, N1
, CC
);
19368 Temp
= DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(N2
), VT
, SCC
);
19371 AddToWorklist(SCC
.getNode());
19372 AddToWorklist(Temp
.getNode());
19377 // shl setcc result by log2 n2c
19378 return DAG
.getNode(ISD::SHL
, DL
, N2
.getValueType(), Temp
,
19379 DAG
.getConstant(N2C
->getAPIntValue().logBase2(),
19381 getShiftAmountTy(Temp
.getValueType())));
19384 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
19385 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
19386 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
19387 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
19388 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
19389 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
19390 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
19391 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
19392 if (N1C
&& N1C
->isNullValue() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
19393 SDValue ValueOnZero
= N2
;
19394 SDValue Count
= N3
;
19395 // If the condition is NE instead of E, swap the operands.
19396 if (CC
== ISD::SETNE
)
19397 std::swap(ValueOnZero
, Count
);
19398 // Check if the value on zero is a constant equal to the bits in the type.
19399 if (auto *ValueOnZeroC
= dyn_cast
<ConstantSDNode
>(ValueOnZero
)) {
19400 if (ValueOnZeroC
->getAPIntValue() == VT
.getSizeInBits()) {
19401 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
19402 // legal, combine to just cttz.
19403 if ((Count
.getOpcode() == ISD::CTTZ
||
19404 Count
.getOpcode() == ISD::CTTZ_ZERO_UNDEF
) &&
19405 N0
== Count
.getOperand(0) &&
19406 (!LegalOperations
|| TLI
.isOperationLegal(ISD::CTTZ
, VT
)))
19407 return DAG
.getNode(ISD::CTTZ
, DL
, VT
, N0
);
19408 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
19409 // legal, combine to just ctlz.
19410 if ((Count
.getOpcode() == ISD::CTLZ
||
19411 Count
.getOpcode() == ISD::CTLZ_ZERO_UNDEF
) &&
19412 N0
== Count
.getOperand(0) &&
19413 (!LegalOperations
|| TLI
.isOperationLegal(ISD::CTLZ
, VT
)))
19414 return DAG
.getNode(ISD::CTLZ
, DL
, VT
, N0
);
19422 /// This is a stub for TargetLowering::SimplifySetCC.
19423 SDValue
DAGCombiner::SimplifySetCC(EVT VT
, SDValue N0
, SDValue N1
,
19424 ISD::CondCode Cond
, const SDLoc
&DL
,
19425 bool foldBooleans
) {
19426 TargetLowering::DAGCombinerInfo
19427 DagCombineInfo(DAG
, Level
, false, this);
19428 return TLI
.SimplifySetCC(VT
, N0
, N1
, Cond
, foldBooleans
, DagCombineInfo
, DL
);
19431 /// Given an ISD::SDIV node expressing a divide by constant, return
19432 /// a DAG expression to select that will generate the same value by multiplying
19433 /// by a magic number.
19434 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19435 SDValue
DAGCombiner::BuildSDIV(SDNode
*N
) {
19436 // when optimising for minimum size, we don't want to expand a div to a mul
19438 if (DAG
.getMachineFunction().getFunction().hasMinSize())
19441 SmallVector
<SDNode
*, 8> Built
;
19442 if (SDValue S
= TLI
.BuildSDIV(N
, DAG
, LegalOperations
, Built
)) {
19443 for (SDNode
*N
: Built
)
19451 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
19452 /// DAG expression that will generate the same value by right shifting.
19453 SDValue
DAGCombiner::BuildSDIVPow2(SDNode
*N
) {
19454 ConstantSDNode
*C
= isConstOrConstSplat(N
->getOperand(1));
19458 // Avoid division by zero.
19459 if (C
->isNullValue())
19462 SmallVector
<SDNode
*, 8> Built
;
19463 if (SDValue S
= TLI
.BuildSDIVPow2(N
, C
->getAPIntValue(), DAG
, Built
)) {
19464 for (SDNode
*N
: Built
)
19472 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
19473 /// expression that will generate the same value by multiplying by a magic
19475 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19476 SDValue
DAGCombiner::BuildUDIV(SDNode
*N
) {
19477 // when optimising for minimum size, we don't want to expand a div to a mul
19479 if (DAG
.getMachineFunction().getFunction().hasMinSize())
19482 SmallVector
<SDNode
*, 8> Built
;
19483 if (SDValue S
= TLI
.BuildUDIV(N
, DAG
, LegalOperations
, Built
)) {
19484 for (SDNode
*N
: Built
)
19492 /// Determines the LogBase2 value for a non-null input value using the
19493 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
19494 SDValue
DAGCombiner::BuildLogBase2(SDValue V
, const SDLoc
&DL
) {
19495 EVT VT
= V
.getValueType();
19496 unsigned EltBits
= VT
.getScalarSizeInBits();
19497 SDValue Ctlz
= DAG
.getNode(ISD::CTLZ
, DL
, VT
, V
);
19498 SDValue Base
= DAG
.getConstant(EltBits
- 1, DL
, VT
);
19499 SDValue LogBase2
= DAG
.getNode(ISD::SUB
, DL
, VT
, Base
, Ctlz
);
19503 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19504 /// For the reciprocal, we need to find the zero of the function:
19505 /// F(X) = A X - 1 [which has a zero at X = 1/A]
19507 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
19508 /// does not require additional intermediate precision]
19509 SDValue
DAGCombiner::BuildReciprocalEstimate(SDValue Op
, SDNodeFlags Flags
) {
19510 if (Level
>= AfterLegalizeDAG
)
19513 // TODO: Handle half and/or extended types?
19514 EVT VT
= Op
.getValueType();
19515 if (VT
.getScalarType() != MVT::f32
&& VT
.getScalarType() != MVT::f64
)
19518 // If estimates are explicitly disabled for this function, we're done.
19519 MachineFunction
&MF
= DAG
.getMachineFunction();
19520 int Enabled
= TLI
.getRecipEstimateDivEnabled(VT
, MF
);
19521 if (Enabled
== TLI
.ReciprocalEstimate::Disabled
)
19524 // Estimates may be explicitly enabled for this type with a custom number of
19525 // refinement steps.
19526 int Iterations
= TLI
.getDivRefinementSteps(VT
, MF
);
19527 if (SDValue Est
= TLI
.getRecipEstimate(Op
, DAG
, Enabled
, Iterations
)) {
19528 AddToWorklist(Est
.getNode());
19532 SDValue FPOne
= DAG
.getConstantFP(1.0, DL
, VT
);
19534 // Newton iterations: Est = Est + Est (1 - Arg * Est)
19535 for (int i
= 0; i
< Iterations
; ++i
) {
19536 SDValue NewEst
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Op
, Est
, Flags
);
19537 AddToWorklist(NewEst
.getNode());
19539 NewEst
= DAG
.getNode(ISD::FSUB
, DL
, VT
, FPOne
, NewEst
, Flags
);
19540 AddToWorklist(NewEst
.getNode());
19542 NewEst
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Est
, NewEst
, Flags
);
19543 AddToWorklist(NewEst
.getNode());
19545 Est
= DAG
.getNode(ISD::FADD
, DL
, VT
, Est
, NewEst
, Flags
);
19546 AddToWorklist(Est
.getNode());
19555 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19556 /// For the reciprocal sqrt, we need to find the zero of the function:
19557 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19559 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
19560 /// As a result, we precompute A/2 prior to the iteration loop.
19561 SDValue
DAGCombiner::buildSqrtNROneConst(SDValue Arg
, SDValue Est
,
19562 unsigned Iterations
,
19563 SDNodeFlags Flags
, bool Reciprocal
) {
19564 EVT VT
= Arg
.getValueType();
19566 SDValue ThreeHalves
= DAG
.getConstantFP(1.5, DL
, VT
);
19568 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
19569 // this entire sequence requires only one FP constant.
19570 SDValue HalfArg
= DAG
.getNode(ISD::FMUL
, DL
, VT
, ThreeHalves
, Arg
, Flags
);
19571 AddToWorklist(HalfArg
.getNode());
19573 HalfArg
= DAG
.getNode(ISD::FSUB
, DL
, VT
, HalfArg
, Arg
, Flags
);
19574 AddToWorklist(HalfArg
.getNode());
19576 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
19577 for (unsigned i
= 0; i
< Iterations
; ++i
) {
19578 SDValue NewEst
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Est
, Est
, Flags
);
19579 AddToWorklist(NewEst
.getNode());
19581 NewEst
= DAG
.getNode(ISD::FMUL
, DL
, VT
, HalfArg
, NewEst
, Flags
);
19582 AddToWorklist(NewEst
.getNode());
19584 NewEst
= DAG
.getNode(ISD::FSUB
, DL
, VT
, ThreeHalves
, NewEst
, Flags
);
19585 AddToWorklist(NewEst
.getNode());
19587 Est
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Est
, NewEst
, Flags
);
19588 AddToWorklist(Est
.getNode());
19591 // If non-reciprocal square root is requested, multiply the result by Arg.
19593 Est
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Est
, Arg
, Flags
);
19594 AddToWorklist(Est
.getNode());
19600 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19601 /// For the reciprocal sqrt, we need to find the zero of the function:
19602 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19604 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
19605 SDValue
DAGCombiner::buildSqrtNRTwoConst(SDValue Arg
, SDValue Est
,
19606 unsigned Iterations
,
19607 SDNodeFlags Flags
, bool Reciprocal
) {
19608 EVT VT
= Arg
.getValueType();
19610 SDValue MinusThree
= DAG
.getConstantFP(-3.0, DL
, VT
);
19611 SDValue MinusHalf
= DAG
.getConstantFP(-0.5, DL
, VT
);
19613 // This routine must enter the loop below to work correctly
19614 // when (Reciprocal == false).
19615 assert(Iterations
> 0);
19617 // Newton iterations for reciprocal square root:
19618 // E = (E * -0.5) * ((A * E) * E + -3.0)
19619 for (unsigned i
= 0; i
< Iterations
; ++i
) {
19620 SDValue AE
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Arg
, Est
, Flags
);
19621 AddToWorklist(AE
.getNode());
19623 SDValue AEE
= DAG
.getNode(ISD::FMUL
, DL
, VT
, AE
, Est
, Flags
);
19624 AddToWorklist(AEE
.getNode());
19626 SDValue RHS
= DAG
.getNode(ISD::FADD
, DL
, VT
, AEE
, MinusThree
, Flags
);
19627 AddToWorklist(RHS
.getNode());
19629 // When calculating a square root at the last iteration build:
19630 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
19631 // (notice a common subexpression)
19633 if (Reciprocal
|| (i
+ 1) < Iterations
) {
19634 // RSQRT: LHS = (E * -0.5)
19635 LHS
= DAG
.getNode(ISD::FMUL
, DL
, VT
, Est
, MinusHalf
, Flags
);
19637 // SQRT: LHS = (A * E) * -0.5
19638 LHS
= DAG
.getNode(ISD::FMUL
, DL
, VT
, AE
, MinusHalf
, Flags
);
19640 AddToWorklist(LHS
.getNode());
19642 Est
= DAG
.getNode(ISD::FMUL
, DL
, VT
, LHS
, RHS
, Flags
);
19643 AddToWorklist(Est
.getNode());
19649 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
19650 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
19651 /// Op can be zero.
19652 SDValue
DAGCombiner::buildSqrtEstimateImpl(SDValue Op
, SDNodeFlags Flags
,
19654 if (Level
>= AfterLegalizeDAG
)
19657 // TODO: Handle half and/or extended types?
19658 EVT VT
= Op
.getValueType();
19659 if (VT
.getScalarType() != MVT::f32
&& VT
.getScalarType() != MVT::f64
)
19662 // If estimates are explicitly disabled for this function, we're done.
19663 MachineFunction
&MF
= DAG
.getMachineFunction();
19664 int Enabled
= TLI
.getRecipEstimateSqrtEnabled(VT
, MF
);
19665 if (Enabled
== TLI
.ReciprocalEstimate::Disabled
)
19668 // Estimates may be explicitly enabled for this type with a custom number of
19669 // refinement steps.
19670 int Iterations
= TLI
.getSqrtRefinementSteps(VT
, MF
);
19672 bool UseOneConstNR
= false;
19674 TLI
.getSqrtEstimate(Op
, DAG
, Enabled
, Iterations
, UseOneConstNR
,
19676 AddToWorklist(Est
.getNode());
19679 Est
= UseOneConstNR
19680 ? buildSqrtNROneConst(Op
, Est
, Iterations
, Flags
, Reciprocal
)
19681 : buildSqrtNRTwoConst(Op
, Est
, Iterations
, Flags
, Reciprocal
);
19684 // The estimate is now completely wrong if the input was exactly 0.0 or
19685 // possibly a denormal. Force the answer to 0.0 for those cases.
19687 EVT CCVT
= getSetCCResultType(VT
);
19688 ISD::NodeType SelOpcode
= VT
.isVector() ? ISD::VSELECT
: ISD::SELECT
;
19689 const Function
&F
= DAG
.getMachineFunction().getFunction();
19690 Attribute Denorms
= F
.getFnAttribute("denormal-fp-math");
19691 if (Denorms
.getValueAsString().equals("ieee")) {
19692 // fabs(X) < SmallestNormal ? 0.0 : Est
19693 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(VT
);
19694 APFloat SmallestNorm
= APFloat::getSmallestNormalized(FltSem
);
19695 SDValue NormC
= DAG
.getConstantFP(SmallestNorm
, DL
, VT
);
19696 SDValue FPZero
= DAG
.getConstantFP(0.0, DL
, VT
);
19697 SDValue Fabs
= DAG
.getNode(ISD::FABS
, DL
, VT
, Op
);
19698 SDValue IsDenorm
= DAG
.getSetCC(DL
, CCVT
, Fabs
, NormC
, ISD::SETLT
);
19699 Est
= DAG
.getNode(SelOpcode
, DL
, VT
, IsDenorm
, FPZero
, Est
);
19700 AddToWorklist(Fabs
.getNode());
19701 AddToWorklist(IsDenorm
.getNode());
19702 AddToWorklist(Est
.getNode());
19704 // X == 0.0 ? 0.0 : Est
19705 SDValue FPZero
= DAG
.getConstantFP(0.0, DL
, VT
);
19706 SDValue IsZero
= DAG
.getSetCC(DL
, CCVT
, Op
, FPZero
, ISD::SETEQ
);
19707 Est
= DAG
.getNode(SelOpcode
, DL
, VT
, IsZero
, FPZero
, Est
);
19708 AddToWorklist(IsZero
.getNode());
19709 AddToWorklist(Est
.getNode());
19719 SDValue
DAGCombiner::buildRsqrtEstimate(SDValue Op
, SDNodeFlags Flags
) {
19720 return buildSqrtEstimateImpl(Op
, Flags
, true);
19723 SDValue
DAGCombiner::buildSqrtEstimate(SDValue Op
, SDNodeFlags Flags
) {
19724 return buildSqrtEstimateImpl(Op
, Flags
, false);
19727 /// Return true if there is any possibility that the two addresses overlap.
19728 bool DAGCombiner::isAlias(SDNode
*Op0
, SDNode
*Op1
) const {
19730 struct MemUseCharacteristics
{
19734 Optional
<int64_t> NumBytes
;
19735 MachineMemOperand
*MMO
;
19738 auto getCharacteristics
= [](SDNode
*N
) -> MemUseCharacteristics
{
19739 if (const auto *LSN
= dyn_cast
<LSBaseSDNode
>(N
)) {
19740 int64_t Offset
= 0;
19741 if (auto *C
= dyn_cast
<ConstantSDNode
>(LSN
->getOffset()))
19742 Offset
= (LSN
->getAddressingMode() == ISD::PRE_INC
)
19743 ? C
->getSExtValue()
19744 : (LSN
->getAddressingMode() == ISD::PRE_DEC
)
19745 ? -1 * C
->getSExtValue()
19747 return {LSN
->isVolatile(), LSN
->getBasePtr(), Offset
/*base offset*/,
19748 Optional
<int64_t>(LSN
->getMemoryVT().getStoreSize()),
19749 LSN
->getMemOperand()};
19751 if (const auto *LN
= cast
<LifetimeSDNode
>(N
))
19752 return {false /*isVolatile*/, LN
->getOperand(1),
19753 (LN
->hasOffset()) ? LN
->getOffset() : 0,
19754 (LN
->hasOffset()) ? Optional
<int64_t>(LN
->getSize())
19755 : Optional
<int64_t>(),
19756 (MachineMemOperand
*)nullptr};
19758 return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
19759 Optional
<int64_t>() /*size*/, (MachineMemOperand
*)nullptr};
19762 MemUseCharacteristics MUC0
= getCharacteristics(Op0
),
19763 MUC1
= getCharacteristics(Op1
);
19765 // If they are to the same address, then they must be aliases.
19766 if (MUC0
.BasePtr
.getNode() && MUC0
.BasePtr
== MUC1
.BasePtr
&&
19767 MUC0
.Offset
== MUC1
.Offset
)
19770 // If they are both volatile then they cannot be reordered.
19771 if (MUC0
.IsVolatile
&& MUC1
.IsVolatile
)
19774 if (MUC0
.MMO
&& MUC1
.MMO
) {
19775 if ((MUC0
.MMO
->isInvariant() && MUC1
.MMO
->isStore()) ||
19776 (MUC1
.MMO
->isInvariant() && MUC0
.MMO
->isStore()))
19781 if (BaseIndexOffset::computeAliasing(Op0
, MUC0
.NumBytes
, Op1
, MUC1
.NumBytes
,
19786 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
19787 // either are not known.
19788 if (!MUC0
.MMO
|| !MUC1
.MMO
)
19791 // If one operation reads from invariant memory, and the other may store, they
19792 // cannot alias. These should really be checking the equivalent of mayWrite,
19793 // but it only matters for memory nodes other than load /store.
19794 if ((MUC0
.MMO
->isInvariant() && MUC1
.MMO
->isStore()) ||
19795 (MUC1
.MMO
->isInvariant() && MUC0
.MMO
->isStore()))
19798 // If we know required SrcValue1 and SrcValue2 have relatively large
19799 // alignment compared to the size and offset of the access, we may be able
19800 // to prove they do not alias. This check is conservative for now to catch
19801 // cases created by splitting vector types.
19802 int64_t SrcValOffset0
= MUC0
.MMO
->getOffset();
19803 int64_t SrcValOffset1
= MUC1
.MMO
->getOffset();
19804 unsigned OrigAlignment0
= MUC0
.MMO
->getBaseAlignment();
19805 unsigned OrigAlignment1
= MUC1
.MMO
->getBaseAlignment();
19806 if (OrigAlignment0
== OrigAlignment1
&& SrcValOffset0
!= SrcValOffset1
&&
19807 MUC0
.NumBytes
.hasValue() && MUC1
.NumBytes
.hasValue() &&
19808 *MUC0
.NumBytes
== *MUC1
.NumBytes
&& OrigAlignment0
> *MUC0
.NumBytes
) {
19809 int64_t OffAlign0
= SrcValOffset0
% OrigAlignment0
;
19810 int64_t OffAlign1
= SrcValOffset1
% OrigAlignment1
;
19812 // There is no overlap between these relatively aligned accesses of
19813 // similar size. Return no alias.
19814 if ((OffAlign0
+ *MUC0
.NumBytes
) <= OffAlign1
||
19815 (OffAlign1
+ *MUC1
.NumBytes
) <= OffAlign0
)
19819 bool UseAA
= CombinerGlobalAA
.getNumOccurrences() > 0
19821 : DAG
.getSubtarget().useAA();
19823 if (CombinerAAOnlyFunc
.getNumOccurrences() &&
19824 CombinerAAOnlyFunc
!= DAG
.getMachineFunction().getName())
19828 if (UseAA
&& AA
&& MUC0
.MMO
->getValue() && MUC1
.MMO
->getValue()) {
19829 // Use alias analysis information.
19830 int64_t MinOffset
= std::min(SrcValOffset0
, SrcValOffset1
);
19831 int64_t Overlap0
= *MUC0
.NumBytes
+ SrcValOffset0
- MinOffset
;
19832 int64_t Overlap1
= *MUC1
.NumBytes
+ SrcValOffset1
- MinOffset
;
19833 AliasResult AAResult
= AA
->alias(
19834 MemoryLocation(MUC0
.MMO
->getValue(), Overlap0
,
19835 UseTBAA
? MUC0
.MMO
->getAAInfo() : AAMDNodes()),
19836 MemoryLocation(MUC1
.MMO
->getValue(), Overlap1
,
19837 UseTBAA
? MUC1
.MMO
->getAAInfo() : AAMDNodes()));
19838 if (AAResult
== NoAlias
)
19842 // Otherwise we have to assume they alias.
19846 /// Walk up chain skipping non-aliasing memory nodes,
19847 /// looking for aliasing nodes and adding them to the Aliases vector.
19848 void DAGCombiner::GatherAllAliases(SDNode
*N
, SDValue OriginalChain
,
19849 SmallVectorImpl
<SDValue
> &Aliases
) {
19850 SmallVector
<SDValue
, 8> Chains
; // List of chains to visit.
19851 SmallPtrSet
<SDNode
*, 16> Visited
; // Visited node set.
19853 // Get alias information for node.
19854 const bool IsLoad
= isa
<LoadSDNode
>(N
) && !cast
<LoadSDNode
>(N
)->isVolatile();
19857 Chains
.push_back(OriginalChain
);
19858 unsigned Depth
= 0;
19860 // Attempt to improve chain by a single step
19861 std::function
<bool(SDValue
&)> ImproveChain
= [&](SDValue
&C
) -> bool {
19862 switch (C
.getOpcode()) {
19863 case ISD::EntryToken
:
19864 // No need to mark EntryToken.
19869 // Get alias information for C.
19870 bool IsOpLoad
= isa
<LoadSDNode
>(C
.getNode()) &&
19871 !cast
<LSBaseSDNode
>(C
.getNode())->isVolatile();
19872 if ((IsLoad
&& IsOpLoad
) || !isAlias(N
, C
.getNode())) {
19873 // Look further up the chain.
19874 C
= C
.getOperand(0);
19877 // Alias, so stop here.
19881 case ISD::CopyFromReg
:
19882 // Always forward past past CopyFromReg.
19883 C
= C
.getOperand(0);
19886 case ISD::LIFETIME_START
:
19887 case ISD::LIFETIME_END
: {
19888 // We can forward past any lifetime start/end that can be proven not to
19889 // alias the memory access.
19890 if (!isAlias(N
, C
.getNode())) {
19891 // Look further up the chain.
19892 C
= C
.getOperand(0);
19902 // Look at each chain and determine if it is an alias. If so, add it to the
19903 // aliases list. If not, then continue up the chain looking for the next
19905 while (!Chains
.empty()) {
19906 SDValue Chain
= Chains
.pop_back_val();
19908 // Don't bother if we've seen Chain before.
19909 if (!Visited
.insert(Chain
.getNode()).second
)
19912 // For TokenFactor nodes, look at each operand and only continue up the
19913 // chain until we reach the depth limit.
19915 // FIXME: The depth check could be made to return the last non-aliasing
19916 // chain we found before we hit a tokenfactor rather than the original
19918 if (Depth
> TLI
.getGatherAllAliasesMaxDepth()) {
19920 Aliases
.push_back(OriginalChain
);
19924 if (Chain
.getOpcode() == ISD::TokenFactor
) {
19925 // We have to check each of the operands of the token factor for "small"
19926 // token factors, so we queue them up. Adding the operands to the queue
19927 // (stack) in reverse order maintains the original order and increases the
19928 // likelihood that getNode will find a matching token factor (CSE.)
19929 if (Chain
.getNumOperands() > 16) {
19930 Aliases
.push_back(Chain
);
19933 for (unsigned n
= Chain
.getNumOperands(); n
;)
19934 Chains
.push_back(Chain
.getOperand(--n
));
19939 if (ImproveChain(Chain
)) {
19940 // Updated Chain Found, Consider new chain if one exists.
19941 if (Chain
.getNode())
19942 Chains
.push_back(Chain
);
19946 // No Improved Chain Possible, treat as Alias.
19947 Aliases
.push_back(Chain
);
19951 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19952 /// (aliasing node.)
19953 SDValue
DAGCombiner::FindBetterChain(SDNode
*N
, SDValue OldChain
) {
19954 if (OptLevel
== CodeGenOpt::None
)
19957 // Ops for replacing token factor.
19958 SmallVector
<SDValue
, 8> Aliases
;
19960 // Accumulate all the aliases to this node.
19961 GatherAllAliases(N
, OldChain
, Aliases
);
19963 // If no operands then chain to entry token.
19964 if (Aliases
.size() == 0)
19965 return DAG
.getEntryNode();
19967 // If a single operand then chain to it. We don't need to revisit it.
19968 if (Aliases
.size() == 1)
19971 // Construct a custom tailored token factor.
19972 return DAG
.getTokenFactor(SDLoc(N
), Aliases
);
19976 // TODO: Replace with with std::monostate when we move to C++17.
19977 struct UnitT
{ } Unit
;
19978 bool operator==(const UnitT
&, const UnitT
&) { return true; }
19979 bool operator!=(const UnitT
&, const UnitT
&) { return false; }
19982 // This function tries to collect a bunch of potentially interesting
19983 // nodes to improve the chains of, all at once. This might seem
19984 // redundant, as this function gets called when visiting every store
19985 // node, so why not let the work be done on each store as it's visited?
19987 // I believe this is mainly important because MergeConsecutiveStores
19988 // is unable to deal with merging stores of different sizes, so unless
19989 // we improve the chains of all the potential candidates up-front
19990 // before running MergeConsecutiveStores, it might only see some of
19991 // the nodes that will eventually be candidates, and then not be able
19992 // to go from a partially-merged state to the desired final
19993 // fully-merged state.
19995 bool DAGCombiner::parallelizeChainedStores(StoreSDNode
*St
) {
19996 SmallVector
<StoreSDNode
*, 8> ChainedStores
;
19997 StoreSDNode
*STChain
= St
;
19998 // Intervals records which offsets from BaseIndex have been covered. In
19999 // the common case, every store writes to the immediately previous address
20000 // space and thus merged with the previous interval at insertion time.
20003 llvm::IntervalMap
<int64_t, UnitT
, 8, IntervalMapHalfOpenInfo
<int64_t>>;
20007 // This holds the base pointer, index, and the offset in bytes from the base
20009 const BaseIndexOffset BasePtr
= BaseIndexOffset::match(St
, DAG
);
20011 // We must have a base and an offset.
20012 if (!BasePtr
.getBase().getNode())
20015 // Do not handle stores to undef base pointers.
20016 if (BasePtr
.getBase().isUndef())
20019 // Add ST's interval.
20020 Intervals
.insert(0, (St
->getMemoryVT().getSizeInBits() + 7) / 8, Unit
);
20022 while (StoreSDNode
*Chain
= dyn_cast
<StoreSDNode
>(STChain
->getChain())) {
20023 // If the chain has more than one use, then we can't reorder the mem ops.
20024 if (!SDValue(Chain
, 0)->hasOneUse())
20026 if (Chain
->isVolatile() || Chain
->isIndexed())
20029 // Find the base pointer and offset for this memory node.
20030 const BaseIndexOffset Ptr
= BaseIndexOffset::match(Chain
, DAG
);
20031 // Check that the base pointer is the same as the original one.
20033 if (!BasePtr
.equalBaseIndex(Ptr
, DAG
, Offset
))
20035 int64_t Length
= (Chain
->getMemoryVT().getSizeInBits() + 7) / 8;
20036 // Make sure we don't overlap with other intervals by checking the ones to
20037 // the left or right before inserting.
20038 auto I
= Intervals
.find(Offset
);
20039 // If there's a next interval, we should end before it.
20040 if (I
!= Intervals
.end() && I
.start() < (Offset
+ Length
))
20042 // If there's a previous interval, we should start after it.
20043 if (I
!= Intervals
.begin() && (--I
).stop() <= Offset
)
20045 Intervals
.insert(Offset
, Offset
+ Length
, Unit
);
20047 ChainedStores
.push_back(Chain
);
20051 // If we didn't find a chained store, exit.
20052 if (ChainedStores
.size() == 0)
20055 // Improve all chained stores (St and ChainedStores members) starting from
20056 // where the store chain ended and return single TokenFactor.
20057 SDValue NewChain
= STChain
->getChain();
20058 SmallVector
<SDValue
, 8> TFOps
;
20059 for (unsigned I
= ChainedStores
.size(); I
;) {
20060 StoreSDNode
*S
= ChainedStores
[--I
];
20061 SDValue BetterChain
= FindBetterChain(S
, NewChain
);
20062 S
= cast
<StoreSDNode
>(DAG
.UpdateNodeOperands(
20063 S
, BetterChain
, S
->getOperand(1), S
->getOperand(2), S
->getOperand(3)));
20064 TFOps
.push_back(SDValue(S
, 0));
20065 ChainedStores
[I
] = S
;
20068 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20069 SDValue BetterChain
= FindBetterChain(St
, NewChain
);
20071 if (St
->isTruncatingStore())
20072 NewST
= DAG
.getTruncStore(BetterChain
, SDLoc(St
), St
->getValue(),
20073 St
->getBasePtr(), St
->getMemoryVT(),
20074 St
->getMemOperand());
20076 NewST
= DAG
.getStore(BetterChain
, SDLoc(St
), St
->getValue(),
20077 St
->getBasePtr(), St
->getMemOperand());
20079 TFOps
.push_back(NewST
);
20081 // If we improved every element of TFOps, then we've lost the dependence on
20082 // NewChain to successors of St and we need to add it back to TFOps. Do so at
20083 // the beginning to keep relative order consistent with FindBetterChains.
20084 auto hasImprovedChain
= [&](SDValue ST
) -> bool {
20085 return ST
->getOperand(0) != NewChain
;
20087 bool AddNewChain
= llvm::all_of(TFOps
, hasImprovedChain
);
20089 TFOps
.insert(TFOps
.begin(), NewChain
);
20091 SDValue TF
= DAG
.getTokenFactor(SDLoc(STChain
), TFOps
);
20094 AddToWorklist(STChain
);
20095 // Add TF operands worklist in reverse order.
20096 for (auto I
= TF
->getNumOperands(); I
;)
20097 AddToWorklist(TF
->getOperand(--I
).getNode());
20098 AddToWorklist(TF
.getNode());
20102 bool DAGCombiner::findBetterNeighborChains(StoreSDNode
*St
) {
20103 if (OptLevel
== CodeGenOpt::None
)
20106 const BaseIndexOffset BasePtr
= BaseIndexOffset::match(St
, DAG
);
20108 // We must have a base and an offset.
20109 if (!BasePtr
.getBase().getNode())
20112 // Do not handle stores to undef base pointers.
20113 if (BasePtr
.getBase().isUndef())
20116 // Directly improve a chain of disjoint stores starting at St.
20117 if (parallelizeChainedStores(St
))
20120 // Improve St's Chain..
20121 SDValue BetterChain
= FindBetterChain(St
, St
->getChain());
20122 if (St
->getChain() != BetterChain
) {
20123 replaceStoreChain(St
, BetterChain
);
20129 /// This is the entry point for the file.
20130 void SelectionDAG::Combine(CombineLevel Level
, AliasAnalysis
*AA
,
20131 CodeGenOpt::Level OptLevel
) {
20132 /// This is the main entry point to this class.
20133 DAGCombiner(*this, AA
, OptLevel
).Run(Level
);