Another attempt to fix the build bot breaks after r360426
[llvm-core.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
blob59f7af01e8b81fba349dfdd39bc0c9d5ec959899
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
76 using namespace llvm;
78 #define DEBUG_TYPE "dagcombine"
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
100 " function"));
101 #endif
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
108 cl::init(false));
110 static cl::opt<bool>
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
114 namespace {
116 class DAGCombiner {
117 SelectionDAG &DAG;
118 const TargetLowering &TLI;
119 CombineLevel Level;
120 CodeGenOpt::Level OptLevel;
121 bool LegalOperations = false;
122 bool LegalTypes = false;
123 bool ForCodeSize;
125 /// Worklist of all of the nodes that need to be simplified.
127 /// This must behave as a stack -- new nodes to process are pushed onto the
128 /// back and when processing we pop off of the back.
130 /// The worklist will not contain duplicates but may contain null entries
131 /// due to nodes being deleted from the underlying DAG.
132 SmallVector<SDNode *, 64> Worklist;
134 /// Mapping from an SDNode to its position on the worklist.
136 /// This is used to find and remove nodes from the worklist (by nulling
137 /// them) when they are deleted from the underlying DAG. It relies on
138 /// stable indices of nodes within the worklist.
139 DenseMap<SDNode *, unsigned> WorklistMap;
140 /// This records all nodes attempted to add to the worklist since we
141 /// considered a new worklist entry. As we keep do not add duplicate nodes
142 /// in the worklist, this is different from the tail of the worklist.
143 SmallSetVector<SDNode *, 32> PruningList;
145 /// Set of nodes which have been combined (at least once).
147 /// This is used to allow us to reliably add any operands of a DAG node
148 /// which have not yet been combined to the worklist.
149 SmallPtrSet<SDNode *, 32> CombinedNodes;
151 // AA - Used for DAG load/store alias analysis.
152 AliasAnalysis *AA;
154 /// When an instruction is simplified, add all users of the instruction to
155 /// the work lists because they might get more simplified now.
156 void AddUsersToWorklist(SDNode *N) {
157 for (SDNode *Node : N->uses())
158 AddToWorklist(Node);
161 // Prune potentially dangling nodes. This is called after
162 // any visit to a node, but should also be called during a visit after any
163 // failed combine which may have created a DAG node.
164 void clearAddedDanglingWorklistEntries() {
165 // Check any nodes added to the worklist to see if they are prunable.
166 while (!PruningList.empty()) {
167 auto *N = PruningList.pop_back_val();
168 if (N->use_empty())
169 recursivelyDeleteUnusedNodes(N);
173 SDNode *getNextWorklistEntry() {
174 // Before we do any work, remove nodes that are not in use.
175 clearAddedDanglingWorklistEntries();
176 SDNode *N = nullptr;
177 // The Worklist holds the SDNodes in order, but it may contain null
178 // entries.
179 while (!N && !Worklist.empty()) {
180 N = Worklist.pop_back_val();
183 if (N) {
184 bool GoodWorklistEntry = WorklistMap.erase(N);
185 (void)GoodWorklistEntry;
186 assert(GoodWorklistEntry &&
187 "Found a worklist entry without a corresponding map entry!");
189 return N;
192 /// Call the node-specific routine that folds each particular type of node.
193 SDValue visit(SDNode *N);
195 public:
196 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
197 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
198 OptLevel(OL), AA(AA) {
199 ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
201 MaximumLegalStoreInBits = 0;
202 for (MVT VT : MVT::all_valuetypes())
203 if (EVT(VT).isSimple() && VT != MVT::Other &&
204 TLI.isTypeLegal(EVT(VT)) &&
205 VT.getSizeInBits() >= MaximumLegalStoreInBits)
206 MaximumLegalStoreInBits = VT.getSizeInBits();
209 void ConsiderForPruning(SDNode *N) {
210 // Mark this for potential pruning.
211 PruningList.insert(N);
214 /// Add to the worklist making sure its instance is at the back (next to be
215 /// processed.)
216 void AddToWorklist(SDNode *N) {
217 assert(N->getOpcode() != ISD::DELETED_NODE &&
218 "Deleted Node added to Worklist");
220 // Skip handle nodes as they can't usefully be combined and confuse the
221 // zero-use deletion strategy.
222 if (N->getOpcode() == ISD::HANDLENODE)
223 return;
225 ConsiderForPruning(N);
227 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
228 Worklist.push_back(N);
231 /// Remove all instances of N from the worklist.
232 void removeFromWorklist(SDNode *N) {
233 CombinedNodes.erase(N);
234 PruningList.remove(N);
236 auto It = WorklistMap.find(N);
237 if (It == WorklistMap.end())
238 return; // Not in the worklist.
240 // Null out the entry rather than erasing it to avoid a linear operation.
241 Worklist[It->second] = nullptr;
242 WorklistMap.erase(It);
245 void deleteAndRecombine(SDNode *N);
246 bool recursivelyDeleteUnusedNodes(SDNode *N);
248 /// Replaces all uses of the results of one DAG node with new values.
249 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
250 bool AddTo = true);
252 /// Replaces all uses of the results of one DAG node with new values.
253 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
254 return CombineTo(N, &Res, 1, AddTo);
257 /// Replaces all uses of the results of one DAG node with new values.
258 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
259 bool AddTo = true) {
260 SDValue To[] = { Res0, Res1 };
261 return CombineTo(N, To, 2, AddTo);
264 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
266 private:
267 unsigned MaximumLegalStoreInBits;
269 /// Check the specified integer node value to see if it can be simplified or
270 /// if things it uses can be simplified by bit propagation.
271 /// If so, return true.
272 bool SimplifyDemandedBits(SDValue Op) {
273 unsigned BitWidth = Op.getScalarValueSizeInBits();
274 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
275 return SimplifyDemandedBits(Op, DemandedBits);
278 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
279 EVT VT = Op.getValueType();
280 unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
281 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
282 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
285 /// Check the specified vector node value to see if it can be simplified or
286 /// if things it uses can be simplified as it only uses some of the
287 /// elements. If so, return true.
288 bool SimplifyDemandedVectorElts(SDValue Op) {
289 unsigned NumElts = Op.getValueType().getVectorNumElements();
290 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
291 return SimplifyDemandedVectorElts(Op, DemandedElts);
294 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
295 const APInt &DemandedElts);
296 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
297 bool AssumeSingleUse = false);
299 bool CombineToPreIndexedLoadStore(SDNode *N);
300 bool CombineToPostIndexedLoadStore(SDNode *N);
301 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
302 bool SliceUpLoad(SDNode *N);
304 // Scalars have size 0 to distinguish from singleton vectors.
305 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
306 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
307 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
309 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
310 /// load.
312 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
313 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
314 /// \param EltNo index of the vector element to load.
315 /// \param OriginalLoad load that EVE came from to be replaced.
316 /// \returns EVE on success SDValue() on failure.
317 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
318 SDValue EltNo,
319 LoadSDNode *OriginalLoad);
320 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
321 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
322 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
323 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
324 SDValue PromoteIntBinOp(SDValue Op);
325 SDValue PromoteIntShiftOp(SDValue Op);
326 SDValue PromoteExtend(SDValue Op);
327 bool PromoteLoad(SDValue Op);
329 /// Call the node-specific routine that knows how to fold each
330 /// particular type of node. If that doesn't do anything, try the
331 /// target-specific DAG combines.
332 SDValue combine(SDNode *N);
334 // Visitation implementation - Implement dag node combining for different
335 // node types. The semantics are as follows:
336 // Return Value:
337 // SDValue.getNode() == 0 - No change was made
338 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
339 // otherwise - N should be replaced by the returned Operand.
341 SDValue visitTokenFactor(SDNode *N);
342 SDValue visitMERGE_VALUES(SDNode *N);
343 SDValue visitADD(SDNode *N);
344 SDValue visitADDLike(SDNode *N);
345 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
346 SDValue visitSUB(SDNode *N);
347 SDValue visitADDSAT(SDNode *N);
348 SDValue visitSUBSAT(SDNode *N);
349 SDValue visitADDC(SDNode *N);
350 SDValue visitADDO(SDNode *N);
351 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
352 SDValue visitSUBC(SDNode *N);
353 SDValue visitSUBO(SDNode *N);
354 SDValue visitADDE(SDNode *N);
355 SDValue visitADDCARRY(SDNode *N);
356 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
357 SDValue visitSUBE(SDNode *N);
358 SDValue visitSUBCARRY(SDNode *N);
359 SDValue visitMUL(SDNode *N);
360 SDValue useDivRem(SDNode *N);
361 SDValue visitSDIV(SDNode *N);
362 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
363 SDValue visitUDIV(SDNode *N);
364 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
365 SDValue visitREM(SDNode *N);
366 SDValue visitMULHU(SDNode *N);
367 SDValue visitMULHS(SDNode *N);
368 SDValue visitSMUL_LOHI(SDNode *N);
369 SDValue visitUMUL_LOHI(SDNode *N);
370 SDValue visitMULO(SDNode *N);
371 SDValue visitIMINMAX(SDNode *N);
372 SDValue visitAND(SDNode *N);
373 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
374 SDValue visitOR(SDNode *N);
375 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
376 SDValue visitXOR(SDNode *N);
377 SDValue SimplifyVBinOp(SDNode *N);
378 SDValue visitSHL(SDNode *N);
379 SDValue visitSRA(SDNode *N);
380 SDValue visitSRL(SDNode *N);
381 SDValue visitFunnelShift(SDNode *N);
382 SDValue visitRotate(SDNode *N);
383 SDValue visitABS(SDNode *N);
384 SDValue visitBSWAP(SDNode *N);
385 SDValue visitBITREVERSE(SDNode *N);
386 SDValue visitCTLZ(SDNode *N);
387 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
388 SDValue visitCTTZ(SDNode *N);
389 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
390 SDValue visitCTPOP(SDNode *N);
391 SDValue visitSELECT(SDNode *N);
392 SDValue visitVSELECT(SDNode *N);
393 SDValue visitSELECT_CC(SDNode *N);
394 SDValue visitSETCC(SDNode *N);
395 SDValue visitSETCCCARRY(SDNode *N);
396 SDValue visitSIGN_EXTEND(SDNode *N);
397 SDValue visitZERO_EXTEND(SDNode *N);
398 SDValue visitANY_EXTEND(SDNode *N);
399 SDValue visitAssertExt(SDNode *N);
400 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
401 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
402 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
403 SDValue visitTRUNCATE(SDNode *N);
404 SDValue visitBITCAST(SDNode *N);
405 SDValue visitBUILD_PAIR(SDNode *N);
406 SDValue visitFADD(SDNode *N);
407 SDValue visitFSUB(SDNode *N);
408 SDValue visitFMUL(SDNode *N);
409 SDValue visitFMA(SDNode *N);
410 SDValue visitFDIV(SDNode *N);
411 SDValue visitFREM(SDNode *N);
412 SDValue visitFSQRT(SDNode *N);
413 SDValue visitFCOPYSIGN(SDNode *N);
414 SDValue visitFPOW(SDNode *N);
415 SDValue visitSINT_TO_FP(SDNode *N);
416 SDValue visitUINT_TO_FP(SDNode *N);
417 SDValue visitFP_TO_SINT(SDNode *N);
418 SDValue visitFP_TO_UINT(SDNode *N);
419 SDValue visitFP_ROUND(SDNode *N);
420 SDValue visitFP_ROUND_INREG(SDNode *N);
421 SDValue visitFP_EXTEND(SDNode *N);
422 SDValue visitFNEG(SDNode *N);
423 SDValue visitFABS(SDNode *N);
424 SDValue visitFCEIL(SDNode *N);
425 SDValue visitFTRUNC(SDNode *N);
426 SDValue visitFFLOOR(SDNode *N);
427 SDValue visitFMINNUM(SDNode *N);
428 SDValue visitFMAXNUM(SDNode *N);
429 SDValue visitFMINIMUM(SDNode *N);
430 SDValue visitFMAXIMUM(SDNode *N);
431 SDValue visitBRCOND(SDNode *N);
432 SDValue visitBR_CC(SDNode *N);
433 SDValue visitLOAD(SDNode *N);
435 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
436 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
438 SDValue visitSTORE(SDNode *N);
439 SDValue visitLIFETIME_END(SDNode *N);
440 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
441 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
442 SDValue visitBUILD_VECTOR(SDNode *N);
443 SDValue visitCONCAT_VECTORS(SDNode *N);
444 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
445 SDValue visitVECTOR_SHUFFLE(SDNode *N);
446 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
447 SDValue visitINSERT_SUBVECTOR(SDNode *N);
448 SDValue visitMLOAD(SDNode *N);
449 SDValue visitMSTORE(SDNode *N);
450 SDValue visitMGATHER(SDNode *N);
451 SDValue visitMSCATTER(SDNode *N);
452 SDValue visitFP_TO_FP16(SDNode *N);
453 SDValue visitFP16_TO_FP(SDNode *N);
454 SDValue visitVECREDUCE(SDNode *N);
456 SDValue visitFADDForFMACombine(SDNode *N);
457 SDValue visitFSUBForFMACombine(SDNode *N);
458 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
460 SDValue XformToShuffleWithZero(SDNode *N);
461 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
462 SDValue N1);
463 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
464 SDValue N1, SDNodeFlags Flags);
466 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
468 SDValue foldSelectOfConstants(SDNode *N);
469 SDValue foldVSelectOfConstants(SDNode *N);
470 SDValue foldBinOpIntoSelect(SDNode *BO);
471 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
472 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
473 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
474 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
475 SDValue N2, SDValue N3, ISD::CondCode CC,
476 bool NotExtCompare = false);
477 SDValue convertSelectOfFPConstantsToLoadOffset(
478 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
479 ISD::CondCode CC);
480 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
481 SDValue N2, SDValue N3, ISD::CondCode CC);
482 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
483 const SDLoc &DL);
484 SDValue unfoldMaskedMerge(SDNode *N);
485 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
486 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
487 const SDLoc &DL, bool foldBooleans);
488 SDValue rebuildSetCC(SDValue N);
490 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
491 SDValue &CC) const;
492 bool isOneUseSetCC(SDValue N) const;
494 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
495 unsigned HiOp);
496 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
497 SDValue CombineExtLoad(SDNode *N);
498 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
499 SDValue combineRepeatedFPDivisors(SDNode *N);
500 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
501 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
502 SDValue BuildSDIV(SDNode *N);
503 SDValue BuildSDIVPow2(SDNode *N);
504 SDValue BuildUDIV(SDNode *N);
505 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
506 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
507 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
508 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
509 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
510 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
511 SDNodeFlags Flags, bool Reciprocal);
512 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
513 SDNodeFlags Flags, bool Reciprocal);
514 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
515 bool DemandHighBits = true);
516 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
517 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
518 SDValue InnerPos, SDValue InnerNeg,
519 unsigned PosOpcode, unsigned NegOpcode,
520 const SDLoc &DL);
521 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
522 SDValue MatchLoadCombine(SDNode *N);
523 SDValue ReduceLoadWidth(SDNode *N);
524 SDValue ReduceLoadOpStoreWidth(SDNode *N);
525 SDValue splitMergedValStore(StoreSDNode *ST);
526 SDValue TransformFPLoadStorePair(SDNode *N);
527 SDValue convertBuildVecZextToZext(SDNode *N);
528 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
529 SDValue reduceBuildVecToShuffle(SDNode *N);
530 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
531 ArrayRef<int> VectorMask, SDValue VecIn1,
532 SDValue VecIn2, unsigned LeftIdx,
533 bool DidSplitVec);
534 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
536 /// Walk up chain skipping non-aliasing memory nodes,
537 /// looking for aliasing nodes and adding them to the Aliases vector.
538 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
539 SmallVectorImpl<SDValue> &Aliases);
541 /// Return true if there is any possibility that the two addresses overlap.
542 bool isAlias(SDNode *Op0, SDNode *Op1) const;
544 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
545 /// chain (aliasing node.)
546 SDValue FindBetterChain(SDNode *N, SDValue Chain);
548 /// Try to replace a store and any possibly adjacent stores on
549 /// consecutive chains with better chains. Return true only if St is
550 /// replaced.
552 /// Notice that other chains may still be replaced even if the function
553 /// returns false.
554 bool findBetterNeighborChains(StoreSDNode *St);
556 // Helper for findBetterNeighborChains. Walk up store chain add additional
557 // chained stores that do not overlap and can be parallelized.
558 bool parallelizeChainedStores(StoreSDNode *St);
560 /// Holds a pointer to an LSBaseSDNode as well as information on where it
561 /// is located in a sequence of memory operations connected by a chain.
562 struct MemOpLink {
563 // Ptr to the mem node.
564 LSBaseSDNode *MemNode;
566 // Offset from the base ptr.
567 int64_t OffsetFromBase;
569 MemOpLink(LSBaseSDNode *N, int64_t Offset)
570 : MemNode(N), OffsetFromBase(Offset) {}
573 /// This is a helper function for visitMUL to check the profitability
574 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
575 /// MulNode is the original multiply, AddNode is (add x, c1),
576 /// and ConstNode is c2.
577 bool isMulAddWithConstProfitable(SDNode *MulNode,
578 SDValue &AddNode,
579 SDValue &ConstNode);
581 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
582 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
583 /// the type of the loaded value to be extended.
584 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
585 EVT LoadResultTy, EVT &ExtVT);
587 /// Helper function to calculate whether the given Load/Store can have its
588 /// width reduced to ExtVT.
589 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
590 EVT &MemVT, unsigned ShAmt = 0);
592 /// Used by BackwardsPropagateMask to find suitable loads.
593 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
594 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
595 ConstantSDNode *Mask, SDNode *&NodeToMask);
596 /// Attempt to propagate a given AND node back to load leaves so that they
597 /// can be combined into narrow loads.
598 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
600 /// Helper function for MergeConsecutiveStores which merges the
601 /// component store chains.
602 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
603 unsigned NumStores);
605 /// This is a helper function for MergeConsecutiveStores. When the
606 /// source elements of the consecutive stores are all constants or
607 /// all extracted vector elements, try to merge them into one
608 /// larger store introducing bitcasts if necessary. \return True
609 /// if a merged store was created.
610 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
611 EVT MemVT, unsigned NumStores,
612 bool IsConstantSrc, bool UseVector,
613 bool UseTrunc);
615 /// This is a helper function for MergeConsecutiveStores. Stores
616 /// that potentially may be merged with St are placed in
617 /// StoreNodes. RootNode is a chain predecessor to all store
618 /// candidates.
619 void getStoreMergeCandidates(StoreSDNode *St,
620 SmallVectorImpl<MemOpLink> &StoreNodes,
621 SDNode *&Root);
623 /// Helper function for MergeConsecutiveStores. Checks if
624 /// candidate stores have indirect dependency through their
625 /// operands. RootNode is the predecessor to all stores calculated
626 /// by getStoreMergeCandidates and is used to prune the dependency check.
627 /// \return True if safe to merge.
628 bool checkMergeStoreCandidatesForDependencies(
629 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
630 SDNode *RootNode);
632 /// Merge consecutive store operations into a wide store.
633 /// This optimization uses wide integers or vectors when possible.
634 /// \return number of stores that were merged into a merged store (the
635 /// affected nodes are stored as a prefix in \p StoreNodes).
636 bool MergeConsecutiveStores(StoreSDNode *St);
638 /// Try to transform a truncation where C is a constant:
639 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
641 /// \p N needs to be a truncation and its first operand an AND. Other
642 /// requirements are checked by the function (e.g. that trunc is
643 /// single-use) and if missed an empty SDValue is returned.
644 SDValue distributeTruncateThroughAnd(SDNode *N);
646 /// Helper function to determine whether the target supports operation
647 /// given by \p Opcode for type \p VT, that is, whether the operation
648 /// is legal or custom before legalizing operations, and whether is
649 /// legal (but not custom) after legalization.
650 bool hasOperation(unsigned Opcode, EVT VT) {
651 if (LegalOperations)
652 return TLI.isOperationLegal(Opcode, VT);
653 return TLI.isOperationLegalOrCustom(Opcode, VT);
656 public:
657 /// Runs the dag combiner on all nodes in the work list
658 void Run(CombineLevel AtLevel);
660 SelectionDAG &getDAG() const { return DAG; }
662 /// Returns a type large enough to hold any valid shift amount - before type
663 /// legalization these can be huge.
664 EVT getShiftAmountTy(EVT LHSTy) {
665 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
666 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
669 /// This method returns true if we are running before type legalization or
670 /// if the specified VT is legal.
671 bool isTypeLegal(const EVT &VT) {
672 if (!LegalTypes) return true;
673 return TLI.isTypeLegal(VT);
676 /// Convenience wrapper around TargetLowering::getSetCCResultType
677 EVT getSetCCResultType(EVT VT) const {
678 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
681 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
682 SDValue OrigLoad, SDValue ExtLoad,
683 ISD::NodeType ExtType);
686 /// This class is a DAGUpdateListener that removes any deleted
687 /// nodes from the worklist.
688 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
689 DAGCombiner &DC;
691 public:
692 explicit WorklistRemover(DAGCombiner &dc)
693 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
695 void NodeDeleted(SDNode *N, SDNode *E) override {
696 DC.removeFromWorklist(N);
700 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
701 DAGCombiner &DC;
703 public:
704 explicit WorklistInserter(DAGCombiner &dc)
705 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
707 // FIXME: Ideally we could add N to the worklist, but this causes exponential
708 // compile time costs in large DAGs, e.g. Halide.
709 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
712 } // end anonymous namespace
714 //===----------------------------------------------------------------------===//
715 // TargetLowering::DAGCombinerInfo implementation
716 //===----------------------------------------------------------------------===//
718 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
719 ((DAGCombiner*)DC)->AddToWorklist(N);
722 SDValue TargetLowering::DAGCombinerInfo::
723 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
724 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
727 SDValue TargetLowering::DAGCombinerInfo::
728 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
729 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
732 SDValue TargetLowering::DAGCombinerInfo::
733 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
734 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
737 void TargetLowering::DAGCombinerInfo::
738 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
739 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
742 //===----------------------------------------------------------------------===//
743 // Helper Functions
744 //===----------------------------------------------------------------------===//
746 void DAGCombiner::deleteAndRecombine(SDNode *N) {
747 removeFromWorklist(N);
749 // If the operands of this node are only used by the node, they will now be
750 // dead. Make sure to re-visit them and recursively delete dead nodes.
751 for (const SDValue &Op : N->ops())
752 // For an operand generating multiple values, one of the values may
753 // become dead allowing further simplification (e.g. split index
754 // arithmetic from an indexed load).
755 if (Op->hasOneUse() || Op->getNumValues() > 1)
756 AddToWorklist(Op.getNode());
758 DAG.DeleteNode(N);
761 /// Return 1 if we can compute the negated form of the specified expression for
762 /// the same cost as the expression itself, or 2 if we can compute the negated
763 /// form more cheaply than the expression itself.
764 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
765 const TargetLowering &TLI,
766 const TargetOptions *Options,
767 bool ForCodeSize,
768 unsigned Depth = 0) {
769 // fneg is removable even if it has multiple uses.
770 if (Op.getOpcode() == ISD::FNEG) return 2;
772 // Don't allow anything with multiple uses unless we know it is free.
773 EVT VT = Op.getValueType();
774 const SDNodeFlags Flags = Op->getFlags();
775 if (!Op.hasOneUse())
776 if (!(Op.getOpcode() == ISD::FP_EXTEND &&
777 TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
778 return 0;
780 // Don't recurse exponentially.
781 if (Depth > 6) return 0;
783 switch (Op.getOpcode()) {
784 default: return false;
785 case ISD::ConstantFP: {
786 if (!LegalOperations)
787 return 1;
789 // Don't invert constant FP values after legalization unless the target says
790 // the negated constant is legal.
791 return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
792 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
793 ForCodeSize);
795 case ISD::FADD:
796 if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
797 return 0;
799 // After operation legalization, it might not be legal to create new FSUBs.
800 if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
801 return 0;
803 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
804 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
805 Options, ForCodeSize, Depth + 1))
806 return V;
807 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
808 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
809 ForCodeSize, Depth + 1);
810 case ISD::FSUB:
811 // We can't turn -(A-B) into B-A when we honor signed zeros.
812 if (!Options->NoSignedZerosFPMath &&
813 !Flags.hasNoSignedZeros())
814 return 0;
816 // fold (fneg (fsub A, B)) -> (fsub B, A)
817 return 1;
819 case ISD::FMUL:
820 case ISD::FDIV:
821 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
822 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
823 Options, ForCodeSize, Depth + 1))
824 return V;
826 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
827 ForCodeSize, Depth + 1);
829 case ISD::FP_EXTEND:
830 case ISD::FP_ROUND:
831 case ISD::FSIN:
832 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
833 ForCodeSize, Depth + 1);
837 /// If isNegatibleForFree returns true, return the newly negated expression.
838 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
839 bool LegalOperations, bool ForCodeSize,
840 unsigned Depth = 0) {
841 const TargetOptions &Options = DAG.getTarget().Options;
842 // fneg is removable even if it has multiple uses.
843 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
845 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
847 const SDNodeFlags Flags = Op.getNode()->getFlags();
849 switch (Op.getOpcode()) {
850 default: llvm_unreachable("Unknown code");
851 case ISD::ConstantFP: {
852 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
853 V.changeSign();
854 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
856 case ISD::FADD:
857 assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
859 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
860 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
861 DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
862 Depth+1))
863 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
864 GetNegatedExpression(Op.getOperand(0), DAG,
865 LegalOperations, ForCodeSize,
866 Depth+1),
867 Op.getOperand(1), Flags);
868 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
869 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
870 GetNegatedExpression(Op.getOperand(1), DAG,
871 LegalOperations, ForCodeSize,
872 Depth+1),
873 Op.getOperand(0), Flags);
874 case ISD::FSUB:
875 // fold (fneg (fsub 0, B)) -> B
876 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
877 if (N0CFP->isZero())
878 return Op.getOperand(1);
880 // fold (fneg (fsub A, B)) -> (fsub B, A)
881 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
882 Op.getOperand(1), Op.getOperand(0), Flags);
884 case ISD::FMUL:
885 case ISD::FDIV:
886 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
887 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
888 DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
889 Depth+1))
890 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
891 GetNegatedExpression(Op.getOperand(0), DAG,
892 LegalOperations, ForCodeSize,
893 Depth+1),
894 Op.getOperand(1), Flags);
896 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
897 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
898 Op.getOperand(0),
899 GetNegatedExpression(Op.getOperand(1), DAG,
900 LegalOperations, ForCodeSize,
901 Depth+1), Flags);
903 case ISD::FP_EXTEND:
904 case ISD::FSIN:
905 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
906 GetNegatedExpression(Op.getOperand(0), DAG,
907 LegalOperations, ForCodeSize,
908 Depth+1));
909 case ISD::FP_ROUND:
910 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
911 GetNegatedExpression(Op.getOperand(0), DAG,
912 LegalOperations, ForCodeSize,
913 Depth+1),
914 Op.getOperand(1));
918 // APInts must be the same size for most operations, this helper
919 // function zero extends the shorter of the pair so that they match.
920 // We provide an Offset so that we can create bitwidths that won't overflow.
921 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
922 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
923 LHS = LHS.zextOrSelf(Bits);
924 RHS = RHS.zextOrSelf(Bits);
927 // Return true if this node is a setcc, or is a select_cc
928 // that selects between the target values used for true and false, making it
929 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
930 // the appropriate nodes based on the type of node we are checking. This
931 // simplifies life a bit for the callers.
932 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
933 SDValue &CC) const {
934 if (N.getOpcode() == ISD::SETCC) {
935 LHS = N.getOperand(0);
936 RHS = N.getOperand(1);
937 CC = N.getOperand(2);
938 return true;
941 if (N.getOpcode() != ISD::SELECT_CC ||
942 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
943 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
944 return false;
946 if (TLI.getBooleanContents(N.getValueType()) ==
947 TargetLowering::UndefinedBooleanContent)
948 return false;
950 LHS = N.getOperand(0);
951 RHS = N.getOperand(1);
952 CC = N.getOperand(4);
953 return true;
956 /// Return true if this is a SetCC-equivalent operation with only one use.
957 /// If this is true, it allows the users to invert the operation for free when
958 /// it is profitable to do so.
959 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
960 SDValue N0, N1, N2;
961 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
962 return true;
963 return false;
966 // Returns the SDNode if it is a constant float BuildVector
967 // or constant float.
968 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
969 if (isa<ConstantFPSDNode>(N))
970 return N.getNode();
971 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
972 return N.getNode();
973 return nullptr;
976 // Determines if it is a constant integer or a build vector of constant
977 // integers (and undefs).
978 // Do not permit build vector implicit truncation.
979 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
980 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
981 return !(Const->isOpaque() && NoOpaques);
982 if (N.getOpcode() != ISD::BUILD_VECTOR)
983 return false;
984 unsigned BitWidth = N.getScalarValueSizeInBits();
985 for (const SDValue &Op : N->op_values()) {
986 if (Op.isUndef())
987 continue;
988 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
989 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
990 (Const->isOpaque() && NoOpaques))
991 return false;
993 return true;
996 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
997 // undef's.
998 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
999 if (V.getOpcode() != ISD::BUILD_VECTOR)
1000 return false;
1001 return isConstantOrConstantVector(V, NoOpaques) ||
1002 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1005 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1006 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1007 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1008 SDValue N0, SDValue N1) {
1009 EVT VT = N0.getValueType();
1011 if (N0.getOpcode() != Opc)
1012 return SDValue();
1014 // Don't reassociate reductions.
1015 if (N0->getFlags().hasVectorReduction())
1016 return SDValue();
1018 if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1019 if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1020 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1021 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1022 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1023 return SDValue();
1025 if (N0.hasOneUse()) {
1026 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1027 // iff (op x, c1) has one use
1028 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1029 if (!OpNode.getNode())
1030 return SDValue();
1031 AddToWorklist(OpNode.getNode());
1032 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1035 return SDValue();
1038 // Try to reassociate commutative binops.
1039 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1040 SDValue N1, SDNodeFlags Flags) {
1041 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1042 // Don't reassociate reductions.
1043 if (Flags.hasVectorReduction())
1044 return SDValue();
1045 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1046 return Combined;
1047 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1048 return Combined;
1049 return SDValue();
1052 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1053 bool AddTo) {
1054 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1055 ++NodesCombined;
1056 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1057 To[0].getNode()->dump(&DAG);
1058 dbgs() << " and " << NumTo - 1 << " other values\n");
1059 for (unsigned i = 0, e = NumTo; i != e; ++i)
1060 assert((!To[i].getNode() ||
1061 N->getValueType(i) == To[i].getValueType()) &&
1062 "Cannot combine value to value of different type!");
1064 WorklistRemover DeadNodes(*this);
1065 DAG.ReplaceAllUsesWith(N, To);
1066 if (AddTo) {
1067 // Push the new nodes and any users onto the worklist
1068 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1069 if (To[i].getNode()) {
1070 AddToWorklist(To[i].getNode());
1071 AddUsersToWorklist(To[i].getNode());
1076 // Finally, if the node is now dead, remove it from the graph. The node
1077 // may not be dead if the replacement process recursively simplified to
1078 // something else needing this node.
1079 if (N->use_empty())
1080 deleteAndRecombine(N);
1081 return SDValue(N, 0);
1084 void DAGCombiner::
1085 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1086 // Replace all uses. If any nodes become isomorphic to other nodes and
1087 // are deleted, make sure to remove them from our worklist.
1088 WorklistRemover DeadNodes(*this);
1089 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1091 // Push the new node and any (possibly new) users onto the worklist.
1092 AddToWorklist(TLO.New.getNode());
1093 AddUsersToWorklist(TLO.New.getNode());
1095 // Finally, if the node is now dead, remove it from the graph. The node
1096 // may not be dead if the replacement process recursively simplified to
1097 // something else needing this node.
1098 if (TLO.Old.getNode()->use_empty())
1099 deleteAndRecombine(TLO.Old.getNode());
1102 /// Check the specified integer node value to see if it can be simplified or if
1103 /// things it uses can be simplified by bit propagation. If so, return true.
1104 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1105 const APInt &DemandedElts) {
1106 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1107 KnownBits Known;
1108 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1109 return false;
1111 // Revisit the node.
1112 AddToWorklist(Op.getNode());
1114 // Replace the old value with the new one.
1115 ++NodesCombined;
1116 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1117 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1118 dbgs() << '\n');
1120 CommitTargetLoweringOpt(TLO);
1121 return true;
1124 /// Check the specified vector node value to see if it can be simplified or
1125 /// if things it uses can be simplified as it only uses some of the elements.
1126 /// If so, return true.
1127 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1128 const APInt &DemandedElts,
1129 bool AssumeSingleUse) {
1130 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1131 APInt KnownUndef, KnownZero;
1132 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1133 TLO, 0, AssumeSingleUse))
1134 return false;
1136 // Revisit the node.
1137 AddToWorklist(Op.getNode());
1139 // Replace the old value with the new one.
1140 ++NodesCombined;
1141 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1142 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1143 dbgs() << '\n');
1145 CommitTargetLoweringOpt(TLO);
1146 return true;
1149 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1150 SDLoc DL(Load);
1151 EVT VT = Load->getValueType(0);
1152 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1154 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1155 Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1156 WorklistRemover DeadNodes(*this);
1157 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1158 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1159 deleteAndRecombine(Load);
1160 AddToWorklist(Trunc.getNode());
1163 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1164 Replace = false;
1165 SDLoc DL(Op);
1166 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1167 LoadSDNode *LD = cast<LoadSDNode>(Op);
1168 EVT MemVT = LD->getMemoryVT();
1169 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1170 : LD->getExtensionType();
1171 Replace = true;
1172 return DAG.getExtLoad(ExtType, DL, PVT,
1173 LD->getChain(), LD->getBasePtr(),
1174 MemVT, LD->getMemOperand());
1177 unsigned Opc = Op.getOpcode();
1178 switch (Opc) {
1179 default: break;
1180 case ISD::AssertSext:
1181 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1182 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1183 break;
1184 case ISD::AssertZext:
1185 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1186 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1187 break;
1188 case ISD::Constant: {
1189 unsigned ExtOpc =
1190 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1191 return DAG.getNode(ExtOpc, DL, PVT, Op);
1195 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1196 return SDValue();
1197 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1200 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1201 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1202 return SDValue();
1203 EVT OldVT = Op.getValueType();
1204 SDLoc DL(Op);
1205 bool Replace = false;
1206 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1207 if (!NewOp.getNode())
1208 return SDValue();
1209 AddToWorklist(NewOp.getNode());
1211 if (Replace)
1212 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1213 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1214 DAG.getValueType(OldVT));
1217 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1218 EVT OldVT = Op.getValueType();
1219 SDLoc DL(Op);
1220 bool Replace = false;
1221 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1222 if (!NewOp.getNode())
1223 return SDValue();
1224 AddToWorklist(NewOp.getNode());
1226 if (Replace)
1227 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1228 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1231 /// Promote the specified integer binary operation if the target indicates it is
1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1233 /// i32 since i16 instructions are longer.
1234 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1235 if (!LegalOperations)
1236 return SDValue();
1238 EVT VT = Op.getValueType();
1239 if (VT.isVector() || !VT.isInteger())
1240 return SDValue();
1242 // If operation type is 'undesirable', e.g. i16 on x86, consider
1243 // promoting it.
1244 unsigned Opc = Op.getOpcode();
1245 if (TLI.isTypeDesirableForOp(Opc, VT))
1246 return SDValue();
1248 EVT PVT = VT;
1249 // Consult target whether it is a good idea to promote this operation and
1250 // what's the right type to promote it to.
1251 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1252 assert(PVT != VT && "Don't know what type to promote to!");
1254 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1256 bool Replace0 = false;
1257 SDValue N0 = Op.getOperand(0);
1258 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1260 bool Replace1 = false;
1261 SDValue N1 = Op.getOperand(1);
1262 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1263 SDLoc DL(Op);
1265 SDValue RV =
1266 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1268 // We are always replacing N0/N1's use in N and only need
1269 // additional replacements if there are additional uses.
1270 Replace0 &= !N0->hasOneUse();
1271 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1273 // Combine Op here so it is preserved past replacements.
1274 CombineTo(Op.getNode(), RV);
1276 // If operands have a use ordering, make sure we deal with
1277 // predecessor first.
1278 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1279 std::swap(N0, N1);
1280 std::swap(NN0, NN1);
1283 if (Replace0) {
1284 AddToWorklist(NN0.getNode());
1285 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1287 if (Replace1) {
1288 AddToWorklist(NN1.getNode());
1289 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1291 return Op;
1293 return SDValue();
1296 /// Promote the specified integer shift operation if the target indicates it is
1297 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1298 /// i32 since i16 instructions are longer.
1299 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1300 if (!LegalOperations)
1301 return SDValue();
1303 EVT VT = Op.getValueType();
1304 if (VT.isVector() || !VT.isInteger())
1305 return SDValue();
1307 // If operation type is 'undesirable', e.g. i16 on x86, consider
1308 // promoting it.
1309 unsigned Opc = Op.getOpcode();
1310 if (TLI.isTypeDesirableForOp(Opc, VT))
1311 return SDValue();
1313 EVT PVT = VT;
1314 // Consult target whether it is a good idea to promote this operation and
1315 // what's the right type to promote it to.
1316 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1317 assert(PVT != VT && "Don't know what type to promote to!");
1319 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1321 bool Replace = false;
1322 SDValue N0 = Op.getOperand(0);
1323 SDValue N1 = Op.getOperand(1);
1324 if (Opc == ISD::SRA)
1325 N0 = SExtPromoteOperand(N0, PVT);
1326 else if (Opc == ISD::SRL)
1327 N0 = ZExtPromoteOperand(N0, PVT);
1328 else
1329 N0 = PromoteOperand(N0, PVT, Replace);
1331 if (!N0.getNode())
1332 return SDValue();
1334 SDLoc DL(Op);
1335 SDValue RV =
1336 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1338 AddToWorklist(N0.getNode());
1339 if (Replace)
1340 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1342 // Deal with Op being deleted.
1343 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1344 return RV;
1346 return SDValue();
1349 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1350 if (!LegalOperations)
1351 return SDValue();
1353 EVT VT = Op.getValueType();
1354 if (VT.isVector() || !VT.isInteger())
1355 return SDValue();
1357 // If operation type is 'undesirable', e.g. i16 on x86, consider
1358 // promoting it.
1359 unsigned Opc = Op.getOpcode();
1360 if (TLI.isTypeDesirableForOp(Opc, VT))
1361 return SDValue();
1363 EVT PVT = VT;
1364 // Consult target whether it is a good idea to promote this operation and
1365 // what's the right type to promote it to.
1366 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1367 assert(PVT != VT && "Don't know what type to promote to!");
1368 // fold (aext (aext x)) -> (aext x)
1369 // fold (aext (zext x)) -> (zext x)
1370 // fold (aext (sext x)) -> (sext x)
1371 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1372 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1374 return SDValue();
1377 bool DAGCombiner::PromoteLoad(SDValue Op) {
1378 if (!LegalOperations)
1379 return false;
1381 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1382 return false;
1384 EVT VT = Op.getValueType();
1385 if (VT.isVector() || !VT.isInteger())
1386 return false;
1388 // If operation type is 'undesirable', e.g. i16 on x86, consider
1389 // promoting it.
1390 unsigned Opc = Op.getOpcode();
1391 if (TLI.isTypeDesirableForOp(Opc, VT))
1392 return false;
1394 EVT PVT = VT;
1395 // Consult target whether it is a good idea to promote this operation and
1396 // what's the right type to promote it to.
1397 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1398 assert(PVT != VT && "Don't know what type to promote to!");
1400 SDLoc DL(Op);
1401 SDNode *N = Op.getNode();
1402 LoadSDNode *LD = cast<LoadSDNode>(N);
1403 EVT MemVT = LD->getMemoryVT();
1404 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1405 : LD->getExtensionType();
1406 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1407 LD->getChain(), LD->getBasePtr(),
1408 MemVT, LD->getMemOperand());
1409 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1411 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1412 Result.getNode()->dump(&DAG); dbgs() << '\n');
1413 WorklistRemover DeadNodes(*this);
1414 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1415 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1416 deleteAndRecombine(N);
1417 AddToWorklist(Result.getNode());
1418 return true;
1420 return false;
1423 /// Recursively delete a node which has no uses and any operands for
1424 /// which it is the only use.
1426 /// Note that this both deletes the nodes and removes them from the worklist.
1427 /// It also adds any nodes who have had a user deleted to the worklist as they
1428 /// may now have only one use and subject to other combines.
1429 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1430 if (!N->use_empty())
1431 return false;
1433 SmallSetVector<SDNode *, 16> Nodes;
1434 Nodes.insert(N);
1435 do {
1436 N = Nodes.pop_back_val();
1437 if (!N)
1438 continue;
1440 if (N->use_empty()) {
1441 for (const SDValue &ChildN : N->op_values())
1442 Nodes.insert(ChildN.getNode());
1444 removeFromWorklist(N);
1445 DAG.DeleteNode(N);
1446 } else {
1447 AddToWorklist(N);
1449 } while (!Nodes.empty());
1450 return true;
1453 //===----------------------------------------------------------------------===//
1454 // Main DAG Combiner implementation
1455 //===----------------------------------------------------------------------===//
1457 void DAGCombiner::Run(CombineLevel AtLevel) {
1458 // set the instance variables, so that the various visit routines may use it.
1459 Level = AtLevel;
1460 LegalOperations = Level >= AfterLegalizeVectorOps;
1461 LegalTypes = Level >= AfterLegalizeTypes;
1463 WorklistInserter AddNodes(*this);
1465 // Add all the dag nodes to the worklist.
1466 for (SDNode &Node : DAG.allnodes())
1467 AddToWorklist(&Node);
1469 // Create a dummy node (which is not added to allnodes), that adds a reference
1470 // to the root node, preventing it from being deleted, and tracking any
1471 // changes of the root.
1472 HandleSDNode Dummy(DAG.getRoot());
1474 // While we have a valid worklist entry node, try to combine it.
1475 while (SDNode *N = getNextWorklistEntry()) {
1476 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1477 // N is deleted from the DAG, since they too may now be dead or may have a
1478 // reduced number of uses, allowing other xforms.
1479 if (recursivelyDeleteUnusedNodes(N))
1480 continue;
1482 WorklistRemover DeadNodes(*this);
1484 // If this combine is running after legalizing the DAG, re-legalize any
1485 // nodes pulled off the worklist.
1486 if (Level == AfterLegalizeDAG) {
1487 SmallSetVector<SDNode *, 16> UpdatedNodes;
1488 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1490 for (SDNode *LN : UpdatedNodes) {
1491 AddToWorklist(LN);
1492 AddUsersToWorklist(LN);
1494 if (!NIsValid)
1495 continue;
1498 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1500 // Add any operands of the new node which have not yet been combined to the
1501 // worklist as well. Because the worklist uniques things already, this
1502 // won't repeatedly process the same operand.
1503 CombinedNodes.insert(N);
1504 for (const SDValue &ChildN : N->op_values())
1505 if (!CombinedNodes.count(ChildN.getNode()))
1506 AddToWorklist(ChildN.getNode());
1508 SDValue RV = combine(N);
1510 if (!RV.getNode())
1511 continue;
1513 ++NodesCombined;
1515 // If we get back the same node we passed in, rather than a new node or
1516 // zero, we know that the node must have defined multiple values and
1517 // CombineTo was used. Since CombineTo takes care of the worklist
1518 // mechanics for us, we have no work to do in this case.
1519 if (RV.getNode() == N)
1520 continue;
1522 assert(N->getOpcode() != ISD::DELETED_NODE &&
1523 RV.getOpcode() != ISD::DELETED_NODE &&
1524 "Node was deleted but visit returned new node!");
1526 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1528 if (N->getNumValues() == RV.getNode()->getNumValues())
1529 DAG.ReplaceAllUsesWith(N, RV.getNode());
1530 else {
1531 assert(N->getValueType(0) == RV.getValueType() &&
1532 N->getNumValues() == 1 && "Type mismatch");
1533 DAG.ReplaceAllUsesWith(N, &RV);
1536 // Push the new node and any users onto the worklist
1537 AddToWorklist(RV.getNode());
1538 AddUsersToWorklist(RV.getNode());
1540 // Finally, if the node is now dead, remove it from the graph. The node
1541 // may not be dead if the replacement process recursively simplified to
1542 // something else needing this node. This will also take care of adding any
1543 // operands which have lost a user to the worklist.
1544 recursivelyDeleteUnusedNodes(N);
1547 // If the root changed (e.g. it was a dead load, update the root).
1548 DAG.setRoot(Dummy.getValue());
1549 DAG.RemoveDeadNodes();
1552 SDValue DAGCombiner::visit(SDNode *N) {
1553 switch (N->getOpcode()) {
1554 default: break;
1555 case ISD::TokenFactor: return visitTokenFactor(N);
1556 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1557 case ISD::ADD: return visitADD(N);
1558 case ISD::SUB: return visitSUB(N);
1559 case ISD::SADDSAT:
1560 case ISD::UADDSAT: return visitADDSAT(N);
1561 case ISD::SSUBSAT:
1562 case ISD::USUBSAT: return visitSUBSAT(N);
1563 case ISD::ADDC: return visitADDC(N);
1564 case ISD::SADDO:
1565 case ISD::UADDO: return visitADDO(N);
1566 case ISD::SUBC: return visitSUBC(N);
1567 case ISD::SSUBO:
1568 case ISD::USUBO: return visitSUBO(N);
1569 case ISD::ADDE: return visitADDE(N);
1570 case ISD::ADDCARRY: return visitADDCARRY(N);
1571 case ISD::SUBE: return visitSUBE(N);
1572 case ISD::SUBCARRY: return visitSUBCARRY(N);
1573 case ISD::MUL: return visitMUL(N);
1574 case ISD::SDIV: return visitSDIV(N);
1575 case ISD::UDIV: return visitUDIV(N);
1576 case ISD::SREM:
1577 case ISD::UREM: return visitREM(N);
1578 case ISD::MULHU: return visitMULHU(N);
1579 case ISD::MULHS: return visitMULHS(N);
1580 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1581 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1582 case ISD::SMULO:
1583 case ISD::UMULO: return visitMULO(N);
1584 case ISD::SMIN:
1585 case ISD::SMAX:
1586 case ISD::UMIN:
1587 case ISD::UMAX: return visitIMINMAX(N);
1588 case ISD::AND: return visitAND(N);
1589 case ISD::OR: return visitOR(N);
1590 case ISD::XOR: return visitXOR(N);
1591 case ISD::SHL: return visitSHL(N);
1592 case ISD::SRA: return visitSRA(N);
1593 case ISD::SRL: return visitSRL(N);
1594 case ISD::ROTR:
1595 case ISD::ROTL: return visitRotate(N);
1596 case ISD::FSHL:
1597 case ISD::FSHR: return visitFunnelShift(N);
1598 case ISD::ABS: return visitABS(N);
1599 case ISD::BSWAP: return visitBSWAP(N);
1600 case ISD::BITREVERSE: return visitBITREVERSE(N);
1601 case ISD::CTLZ: return visitCTLZ(N);
1602 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1603 case ISD::CTTZ: return visitCTTZ(N);
1604 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1605 case ISD::CTPOP: return visitCTPOP(N);
1606 case ISD::SELECT: return visitSELECT(N);
1607 case ISD::VSELECT: return visitVSELECT(N);
1608 case ISD::SELECT_CC: return visitSELECT_CC(N);
1609 case ISD::SETCC: return visitSETCC(N);
1610 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1611 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1612 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1613 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1614 case ISD::AssertSext:
1615 case ISD::AssertZext: return visitAssertExt(N);
1616 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1617 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1618 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1619 case ISD::TRUNCATE: return visitTRUNCATE(N);
1620 case ISD::BITCAST: return visitBITCAST(N);
1621 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1622 case ISD::FADD: return visitFADD(N);
1623 case ISD::FSUB: return visitFSUB(N);
1624 case ISD::FMUL: return visitFMUL(N);
1625 case ISD::FMA: return visitFMA(N);
1626 case ISD::FDIV: return visitFDIV(N);
1627 case ISD::FREM: return visitFREM(N);
1628 case ISD::FSQRT: return visitFSQRT(N);
1629 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1630 case ISD::FPOW: return visitFPOW(N);
1631 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1632 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1633 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1634 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1635 case ISD::FP_ROUND: return visitFP_ROUND(N);
1636 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1637 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1638 case ISD::FNEG: return visitFNEG(N);
1639 case ISD::FABS: return visitFABS(N);
1640 case ISD::FFLOOR: return visitFFLOOR(N);
1641 case ISD::FMINNUM: return visitFMINNUM(N);
1642 case ISD::FMAXNUM: return visitFMAXNUM(N);
1643 case ISD::FMINIMUM: return visitFMINIMUM(N);
1644 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1645 case ISD::FCEIL: return visitFCEIL(N);
1646 case ISD::FTRUNC: return visitFTRUNC(N);
1647 case ISD::BRCOND: return visitBRCOND(N);
1648 case ISD::BR_CC: return visitBR_CC(N);
1649 case ISD::LOAD: return visitLOAD(N);
1650 case ISD::STORE: return visitSTORE(N);
1651 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1652 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1653 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1654 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1655 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1656 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1657 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1658 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1659 case ISD::MGATHER: return visitMGATHER(N);
1660 case ISD::MLOAD: return visitMLOAD(N);
1661 case ISD::MSCATTER: return visitMSCATTER(N);
1662 case ISD::MSTORE: return visitMSTORE(N);
1663 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1664 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1665 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1666 case ISD::VECREDUCE_FADD:
1667 case ISD::VECREDUCE_FMUL:
1668 case ISD::VECREDUCE_ADD:
1669 case ISD::VECREDUCE_MUL:
1670 case ISD::VECREDUCE_AND:
1671 case ISD::VECREDUCE_OR:
1672 case ISD::VECREDUCE_XOR:
1673 case ISD::VECREDUCE_SMAX:
1674 case ISD::VECREDUCE_SMIN:
1675 case ISD::VECREDUCE_UMAX:
1676 case ISD::VECREDUCE_UMIN:
1677 case ISD::VECREDUCE_FMAX:
1678 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1680 return SDValue();
1683 SDValue DAGCombiner::combine(SDNode *N) {
1684 SDValue RV = visit(N);
1686 // If nothing happened, try a target-specific DAG combine.
1687 if (!RV.getNode()) {
1688 assert(N->getOpcode() != ISD::DELETED_NODE &&
1689 "Node was deleted but visit returned NULL!");
1691 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1692 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1694 // Expose the DAG combiner to the target combiner impls.
1695 TargetLowering::DAGCombinerInfo
1696 DagCombineInfo(DAG, Level, false, this);
1698 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1702 // If nothing happened still, try promoting the operation.
1703 if (!RV.getNode()) {
1704 switch (N->getOpcode()) {
1705 default: break;
1706 case ISD::ADD:
1707 case ISD::SUB:
1708 case ISD::MUL:
1709 case ISD::AND:
1710 case ISD::OR:
1711 case ISD::XOR:
1712 RV = PromoteIntBinOp(SDValue(N, 0));
1713 break;
1714 case ISD::SHL:
1715 case ISD::SRA:
1716 case ISD::SRL:
1717 RV = PromoteIntShiftOp(SDValue(N, 0));
1718 break;
1719 case ISD::SIGN_EXTEND:
1720 case ISD::ZERO_EXTEND:
1721 case ISD::ANY_EXTEND:
1722 RV = PromoteExtend(SDValue(N, 0));
1723 break;
1724 case ISD::LOAD:
1725 if (PromoteLoad(SDValue(N, 0)))
1726 RV = SDValue(N, 0);
1727 break;
1731 // If N is a commutative binary node, try eliminate it if the commuted
1732 // version is already present in the DAG.
1733 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1734 N->getNumValues() == 1) {
1735 SDValue N0 = N->getOperand(0);
1736 SDValue N1 = N->getOperand(1);
1738 // Constant operands are canonicalized to RHS.
1739 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1740 SDValue Ops[] = {N1, N0};
1741 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1742 N->getFlags());
1743 if (CSENode)
1744 return SDValue(CSENode, 0);
1748 return RV;
1751 /// Given a node, return its input chain if it has one, otherwise return a null
1752 /// sd operand.
1753 static SDValue getInputChainForNode(SDNode *N) {
1754 if (unsigned NumOps = N->getNumOperands()) {
1755 if (N->getOperand(0).getValueType() == MVT::Other)
1756 return N->getOperand(0);
1757 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1758 return N->getOperand(NumOps-1);
1759 for (unsigned i = 1; i < NumOps-1; ++i)
1760 if (N->getOperand(i).getValueType() == MVT::Other)
1761 return N->getOperand(i);
1763 return SDValue();
1766 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1767 // If N has two operands, where one has an input chain equal to the other,
1768 // the 'other' chain is redundant.
1769 if (N->getNumOperands() == 2) {
1770 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1771 return N->getOperand(0);
1772 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1773 return N->getOperand(1);
1776 // Don't simplify token factors if optnone.
1777 if (OptLevel == CodeGenOpt::None)
1778 return SDValue();
1780 // If the sole user is a token factor, we should make sure we have a
1781 // chance to merge them together. This prevents TF chains from inhibiting
1782 // optimizations.
1783 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1784 AddToWorklist(*(N->use_begin()));
1786 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1787 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1788 SmallPtrSet<SDNode*, 16> SeenOps;
1789 bool Changed = false; // If we should replace this token factor.
1791 // Start out with this token factor.
1792 TFs.push_back(N);
1794 // Iterate through token factors. The TFs grows when new token factors are
1795 // encountered. Limit number of nodes to inline, to avoid quadratic compile
1796 // times.
1797 for (unsigned i = 0; i < TFs.size() && Ops.size() <= 2048; ++i) {
1798 SDNode *TF = TFs[i];
1800 // Check each of the operands.
1801 for (const SDValue &Op : TF->op_values()) {
1802 switch (Op.getOpcode()) {
1803 case ISD::EntryToken:
1804 // Entry tokens don't need to be added to the list. They are
1805 // redundant.
1806 Changed = true;
1807 break;
1809 case ISD::TokenFactor:
1810 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1811 // Queue up for processing.
1812 TFs.push_back(Op.getNode());
1813 // Clean up in case the token factor is removed.
1814 AddToWorklist(Op.getNode());
1815 Changed = true;
1816 break;
1818 LLVM_FALLTHROUGH;
1820 default:
1821 // Only add if it isn't already in the list.
1822 if (SeenOps.insert(Op.getNode()).second)
1823 Ops.push_back(Op);
1824 else
1825 Changed = true;
1826 break;
1831 // Remove Nodes that are chained to another node in the list. Do so
1832 // by walking up chains breath-first stopping when we've seen
1833 // another operand. In general we must climb to the EntryNode, but we can exit
1834 // early if we find all remaining work is associated with just one operand as
1835 // no further pruning is possible.
1837 // List of nodes to search through and original Ops from which they originate.
1838 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1839 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1840 SmallPtrSet<SDNode *, 16> SeenChains;
1841 bool DidPruneOps = false;
1843 unsigned NumLeftToConsider = 0;
1844 for (const SDValue &Op : Ops) {
1845 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1846 OpWorkCount.push_back(1);
1849 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1850 // If this is an Op, we can remove the op from the list. Remark any
1851 // search associated with it as from the current OpNumber.
1852 if (SeenOps.count(Op) != 0) {
1853 Changed = true;
1854 DidPruneOps = true;
1855 unsigned OrigOpNumber = 0;
1856 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1857 OrigOpNumber++;
1858 assert((OrigOpNumber != Ops.size()) &&
1859 "expected to find TokenFactor Operand");
1860 // Re-mark worklist from OrigOpNumber to OpNumber
1861 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1862 if (Worklist[i].second == OrigOpNumber) {
1863 Worklist[i].second = OpNumber;
1866 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1867 OpWorkCount[OrigOpNumber] = 0;
1868 NumLeftToConsider--;
1870 // Add if it's a new chain
1871 if (SeenChains.insert(Op).second) {
1872 OpWorkCount[OpNumber]++;
1873 Worklist.push_back(std::make_pair(Op, OpNumber));
1877 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1878 // We need at least be consider at least 2 Ops to prune.
1879 if (NumLeftToConsider <= 1)
1880 break;
1881 auto CurNode = Worklist[i].first;
1882 auto CurOpNumber = Worklist[i].second;
1883 assert((OpWorkCount[CurOpNumber] > 0) &&
1884 "Node should not appear in worklist");
1885 switch (CurNode->getOpcode()) {
1886 case ISD::EntryToken:
1887 // Hitting EntryToken is the only way for the search to terminate without
1888 // hitting
1889 // another operand's search. Prevent us from marking this operand
1890 // considered.
1891 NumLeftToConsider++;
1892 break;
1893 case ISD::TokenFactor:
1894 for (const SDValue &Op : CurNode->op_values())
1895 AddToWorklist(i, Op.getNode(), CurOpNumber);
1896 break;
1897 case ISD::LIFETIME_START:
1898 case ISD::LIFETIME_END:
1899 case ISD::CopyFromReg:
1900 case ISD::CopyToReg:
1901 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1902 break;
1903 default:
1904 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1905 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1906 break;
1908 OpWorkCount[CurOpNumber]--;
1909 if (OpWorkCount[CurOpNumber] == 0)
1910 NumLeftToConsider--;
1913 // If we've changed things around then replace token factor.
1914 if (Changed) {
1915 SDValue Result;
1916 if (Ops.empty()) {
1917 // The entry token is the only possible outcome.
1918 Result = DAG.getEntryNode();
1919 } else {
1920 if (DidPruneOps) {
1921 SmallVector<SDValue, 8> PrunedOps;
1923 for (const SDValue &Op : Ops) {
1924 if (SeenChains.count(Op.getNode()) == 0)
1925 PrunedOps.push_back(Op);
1927 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1928 } else {
1929 Result = DAG.getTokenFactor(SDLoc(N), Ops);
1932 return Result;
1934 return SDValue();
1937 /// MERGE_VALUES can always be eliminated.
1938 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1939 WorklistRemover DeadNodes(*this);
1940 // Replacing results may cause a different MERGE_VALUES to suddenly
1941 // be CSE'd with N, and carry its uses with it. Iterate until no
1942 // uses remain, to ensure that the node can be safely deleted.
1943 // First add the users of this node to the work list so that they
1944 // can be tried again once they have new operands.
1945 AddUsersToWorklist(N);
1946 do {
1947 // Do as a single replacement to avoid rewalking use lists.
1948 SmallVector<SDValue, 8> Ops;
1949 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1950 Ops.push_back(N->getOperand(i));
1951 DAG.ReplaceAllUsesWith(N, Ops.data());
1952 } while (!N->use_empty());
1953 deleteAndRecombine(N);
1954 return SDValue(N, 0); // Return N so it doesn't get rechecked!
1957 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1958 /// ConstantSDNode pointer else nullptr.
1959 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1960 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1961 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1964 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1965 assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1967 // Don't do this unless the old select is going away. We want to eliminate the
1968 // binary operator, not replace a binop with a select.
1969 // TODO: Handle ISD::SELECT_CC.
1970 unsigned SelOpNo = 0;
1971 SDValue Sel = BO->getOperand(0);
1972 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1973 SelOpNo = 1;
1974 Sel = BO->getOperand(1);
1977 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1978 return SDValue();
1980 SDValue CT = Sel.getOperand(1);
1981 if (!isConstantOrConstantVector(CT, true) &&
1982 !isConstantFPBuildVectorOrConstantFP(CT))
1983 return SDValue();
1985 SDValue CF = Sel.getOperand(2);
1986 if (!isConstantOrConstantVector(CF, true) &&
1987 !isConstantFPBuildVectorOrConstantFP(CF))
1988 return SDValue();
1990 // Bail out if any constants are opaque because we can't constant fold those.
1991 // The exception is "and" and "or" with either 0 or -1 in which case we can
1992 // propagate non constant operands into select. I.e.:
1993 // and (select Cond, 0, -1), X --> select Cond, 0, X
1994 // or X, (select Cond, -1, 0) --> select Cond, -1, X
1995 auto BinOpcode = BO->getOpcode();
1996 bool CanFoldNonConst =
1997 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1998 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1999 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2001 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2002 if (!CanFoldNonConst &&
2003 !isConstantOrConstantVector(CBO, true) &&
2004 !isConstantFPBuildVectorOrConstantFP(CBO))
2005 return SDValue();
2007 EVT VT = Sel.getValueType();
2009 // In case of shift value and shift amount may have different VT. For instance
2010 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2011 // swapped operands and value types do not match. NB: x86 is fine if operands
2012 // are not swapped with shift amount VT being not bigger than shifted value.
2013 // TODO: that is possible to check for a shift operation, correct VTs and
2014 // still perform optimization on x86 if needed.
2015 if (SelOpNo && VT != CBO.getValueType())
2016 return SDValue();
2018 // We have a select-of-constants followed by a binary operator with a
2019 // constant. Eliminate the binop by pulling the constant math into the select.
2020 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2021 SDLoc DL(Sel);
2022 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2023 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2024 if (!CanFoldNonConst && !NewCT.isUndef() &&
2025 !isConstantOrConstantVector(NewCT, true) &&
2026 !isConstantFPBuildVectorOrConstantFP(NewCT))
2027 return SDValue();
2029 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2030 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2031 if (!CanFoldNonConst && !NewCF.isUndef() &&
2032 !isConstantOrConstantVector(NewCF, true) &&
2033 !isConstantFPBuildVectorOrConstantFP(NewCF))
2034 return SDValue();
2036 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2039 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2040 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2041 "Expecting add or sub");
2043 // Match a constant operand and a zext operand for the math instruction:
2044 // add Z, C
2045 // sub C, Z
2046 bool IsAdd = N->getOpcode() == ISD::ADD;
2047 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2048 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2049 auto *CN = dyn_cast<ConstantSDNode>(C);
2050 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2051 return SDValue();
2053 // Match the zext operand as a setcc of a boolean.
2054 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2055 Z.getOperand(0).getValueType() != MVT::i1)
2056 return SDValue();
2058 // Match the compare as: setcc (X & 1), 0, eq.
2059 SDValue SetCC = Z.getOperand(0);
2060 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2061 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2062 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2063 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2064 return SDValue();
2066 // We are adding/subtracting a constant and an inverted low bit. Turn that
2067 // into a subtract/add of the low bit with incremented/decremented constant:
2068 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2069 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2070 EVT VT = C.getValueType();
2071 SDLoc DL(N);
2072 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2073 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2074 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2075 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2078 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2079 /// a shift and add with a different constant.
2080 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2081 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2082 "Expecting add or sub");
2084 // We need a constant operand for the add/sub, and the other operand is a
2085 // logical shift right: add (srl), C or sub C, (srl).
2086 bool IsAdd = N->getOpcode() == ISD::ADD;
2087 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2088 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2089 ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2090 if (!C || ShiftOp.getOpcode() != ISD::SRL)
2091 return SDValue();
2093 // The shift must be of a 'not' value.
2094 SDValue Not = ShiftOp.getOperand(0);
2095 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2096 return SDValue();
2098 // The shift must be moving the sign bit to the least-significant-bit.
2099 EVT VT = ShiftOp.getValueType();
2100 SDValue ShAmt = ShiftOp.getOperand(1);
2101 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2102 if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2103 return SDValue();
2105 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2106 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2107 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2108 SDLoc DL(N);
2109 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2110 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2111 APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2112 return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2115 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2116 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2117 /// are no common bits set in the operands).
2118 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2119 SDValue N0 = N->getOperand(0);
2120 SDValue N1 = N->getOperand(1);
2121 EVT VT = N0.getValueType();
2122 SDLoc DL(N);
2124 // fold vector ops
2125 if (VT.isVector()) {
2126 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2127 return FoldedVOp;
2129 // fold (add x, 0) -> x, vector edition
2130 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2131 return N0;
2132 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2133 return N1;
2136 // fold (add x, undef) -> undef
2137 if (N0.isUndef())
2138 return N0;
2140 if (N1.isUndef())
2141 return N1;
2143 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2144 // canonicalize constant to RHS
2145 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2146 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2147 // fold (add c1, c2) -> c1+c2
2148 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2149 N1.getNode());
2152 // fold (add x, 0) -> x
2153 if (isNullConstant(N1))
2154 return N0;
2156 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2157 // fold ((c1-A)+c2) -> (c1+c2)-A
2158 if (N0.getOpcode() == ISD::SUB &&
2159 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2160 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2161 return DAG.getNode(ISD::SUB, DL, VT,
2162 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2163 N0.getOperand(1));
2166 // add (sext i1 X), 1 -> zext (not i1 X)
2167 // We don't transform this pattern:
2168 // add (zext i1 X), -1 -> sext (not i1 X)
2169 // because most (?) targets generate better code for the zext form.
2170 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2171 isOneOrOneSplat(N1)) {
2172 SDValue X = N0.getOperand(0);
2173 if ((!LegalOperations ||
2174 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2175 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2176 X.getScalarValueSizeInBits() == 1) {
2177 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2178 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2182 // Undo the add -> or combine to merge constant offsets from a frame index.
2183 if (N0.getOpcode() == ISD::OR &&
2184 isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2185 isa<ConstantSDNode>(N0.getOperand(1)) &&
2186 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2187 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2188 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2192 if (SDValue NewSel = foldBinOpIntoSelect(N))
2193 return NewSel;
2195 // reassociate add
2196 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2197 return RADD;
2199 // fold ((0-A) + B) -> B-A
2200 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2201 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2203 // fold (A + (0-B)) -> A-B
2204 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2205 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2207 // fold (A+(B-A)) -> B
2208 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2209 return N1.getOperand(0);
2211 // fold ((B-A)+A) -> B
2212 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2213 return N0.getOperand(0);
2215 // fold ((A-B)+(C-A)) -> (C-B)
2216 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2217 N0.getOperand(0) == N1.getOperand(1))
2218 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2219 N0.getOperand(1));
2221 // fold ((A-B)+(B-C)) -> (A-C)
2222 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2223 N0.getOperand(1) == N1.getOperand(0))
2224 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2225 N1.getOperand(1));
2227 // fold (A+(B-(A+C))) to (B-C)
2228 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2229 N0 == N1.getOperand(1).getOperand(0))
2230 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2231 N1.getOperand(1).getOperand(1));
2233 // fold (A+(B-(C+A))) to (B-C)
2234 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2235 N0 == N1.getOperand(1).getOperand(1))
2236 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2237 N1.getOperand(1).getOperand(0));
2239 // fold (A+((B-A)+or-C)) to (B+or-C)
2240 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2241 N1.getOperand(0).getOpcode() == ISD::SUB &&
2242 N0 == N1.getOperand(0).getOperand(1))
2243 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2244 N1.getOperand(1));
2246 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2247 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2248 SDValue N00 = N0.getOperand(0);
2249 SDValue N01 = N0.getOperand(1);
2250 SDValue N10 = N1.getOperand(0);
2251 SDValue N11 = N1.getOperand(1);
2253 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2254 return DAG.getNode(ISD::SUB, DL, VT,
2255 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2256 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2259 // fold (add (umax X, C), -C) --> (usubsat X, C)
2260 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2261 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2262 return (!Max && !Op) ||
2263 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2265 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2266 /*AllowUndefs*/ true))
2267 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2268 N0.getOperand(1));
2271 if (SimplifyDemandedBits(SDValue(N, 0)))
2272 return SDValue(N, 0);
2274 if (isOneOrOneSplat(N1)) {
2275 // fold (add (xor a, -1), 1) -> (sub 0, a)
2276 if (isBitwiseNot(N0))
2277 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2278 N0.getOperand(0));
2280 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2281 if (N0.getOpcode() == ISD::ADD ||
2282 N0.getOpcode() == ISD::UADDO ||
2283 N0.getOpcode() == ISD::SADDO) {
2284 SDValue A, Xor;
2286 if (isBitwiseNot(N0.getOperand(0))) {
2287 A = N0.getOperand(1);
2288 Xor = N0.getOperand(0);
2289 } else if (isBitwiseNot(N0.getOperand(1))) {
2290 A = N0.getOperand(0);
2291 Xor = N0.getOperand(1);
2294 if (Xor)
2295 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2299 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2300 return Combined;
2302 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2303 return Combined;
2305 return SDValue();
2308 SDValue DAGCombiner::visitADD(SDNode *N) {
2309 SDValue N0 = N->getOperand(0);
2310 SDValue N1 = N->getOperand(1);
2311 EVT VT = N0.getValueType();
2312 SDLoc DL(N);
2314 if (SDValue Combined = visitADDLike(N))
2315 return Combined;
2317 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2318 return V;
2320 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2321 return V;
2323 // fold (a+b) -> (a|b) iff a and b share no bits.
2324 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2325 DAG.haveNoCommonBitsSet(N0, N1))
2326 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2328 return SDValue();
2331 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2332 unsigned Opcode = N->getOpcode();
2333 SDValue N0 = N->getOperand(0);
2334 SDValue N1 = N->getOperand(1);
2335 EVT VT = N0.getValueType();
2336 SDLoc DL(N);
2338 // fold vector ops
2339 if (VT.isVector()) {
2340 // TODO SimplifyVBinOp
2342 // fold (add_sat x, 0) -> x, vector edition
2343 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2344 return N0;
2345 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2346 return N1;
2349 // fold (add_sat x, undef) -> -1
2350 if (N0.isUndef() || N1.isUndef())
2351 return DAG.getAllOnesConstant(DL, VT);
2353 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2354 // canonicalize constant to RHS
2355 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2356 return DAG.getNode(Opcode, DL, VT, N1, N0);
2357 // fold (add_sat c1, c2) -> c3
2358 return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2359 N1.getNode());
2362 // fold (add_sat x, 0) -> x
2363 if (isNullConstant(N1))
2364 return N0;
2366 // If it cannot overflow, transform into an add.
2367 if (Opcode == ISD::UADDSAT)
2368 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2369 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2371 return SDValue();
2374 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2375 bool Masked = false;
2377 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2378 while (true) {
2379 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2380 V = V.getOperand(0);
2381 continue;
2384 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2385 Masked = true;
2386 V = V.getOperand(0);
2387 continue;
2390 break;
2393 // If this is not a carry, return.
2394 if (V.getResNo() != 1)
2395 return SDValue();
2397 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2398 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2399 return SDValue();
2401 EVT VT = V.getNode()->getValueType(0);
2402 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2403 return SDValue();
2405 // If the result is masked, then no matter what kind of bool it is we can
2406 // return. If it isn't, then we need to make sure the bool type is either 0 or
2407 // 1 and not other values.
2408 if (Masked ||
2409 TLI.getBooleanContents(V.getValueType()) ==
2410 TargetLoweringBase::ZeroOrOneBooleanContent)
2411 return V;
2413 return SDValue();
2416 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2417 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2418 /// the opcode and bypass the mask operation.
2419 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2420 SelectionDAG &DAG, const SDLoc &DL) {
2421 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2422 return SDValue();
2424 EVT VT = N0.getValueType();
2425 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2426 return SDValue();
2428 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2429 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2430 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2433 /// Helper for doing combines based on N0 and N1 being added to each other.
2434 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2435 SDNode *LocReference) {
2436 EVT VT = N0.getValueType();
2437 SDLoc DL(LocReference);
2439 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2440 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2441 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2442 return DAG.getNode(ISD::SUB, DL, VT, N0,
2443 DAG.getNode(ISD::SHL, DL, VT,
2444 N1.getOperand(0).getOperand(1),
2445 N1.getOperand(1)));
2447 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2448 return V;
2450 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2451 // rather than 'add 0/-1' (the zext should get folded).
2452 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2453 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2454 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2455 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2456 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2457 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2460 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2461 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2462 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2463 if (TN->getVT() == MVT::i1) {
2464 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2465 DAG.getConstant(1, DL, VT));
2466 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2470 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2471 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2472 N1.getResNo() == 0)
2473 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2474 N0, N1.getOperand(0), N1.getOperand(2));
2476 // (add X, Carry) -> (addcarry X, 0, Carry)
2477 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2478 if (SDValue Carry = getAsCarry(TLI, N1))
2479 return DAG.getNode(ISD::ADDCARRY, DL,
2480 DAG.getVTList(VT, Carry.getValueType()), N0,
2481 DAG.getConstant(0, DL, VT), Carry);
2483 return SDValue();
2486 SDValue DAGCombiner::visitADDC(SDNode *N) {
2487 SDValue N0 = N->getOperand(0);
2488 SDValue N1 = N->getOperand(1);
2489 EVT VT = N0.getValueType();
2490 SDLoc DL(N);
2492 // If the flag result is dead, turn this into an ADD.
2493 if (!N->hasAnyUseOfValue(1))
2494 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2495 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2497 // canonicalize constant to RHS.
2498 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2499 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2500 if (N0C && !N1C)
2501 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2503 // fold (addc x, 0) -> x + no carry out
2504 if (isNullConstant(N1))
2505 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2506 DL, MVT::Glue));
2508 // If it cannot overflow, transform into an add.
2509 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2510 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2511 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2513 return SDValue();
2516 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2517 SelectionDAG &DAG, const TargetLowering &TLI) {
2518 EVT VT = V.getValueType();
2520 SDValue Cst;
2521 switch (TLI.getBooleanContents(VT)) {
2522 case TargetLowering::ZeroOrOneBooleanContent:
2523 case TargetLowering::UndefinedBooleanContent:
2524 Cst = DAG.getConstant(1, DL, VT);
2525 break;
2526 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2527 Cst = DAG.getConstant(-1, DL, VT);
2528 break;
2531 return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2534 static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) {
2535 if (V.getOpcode() != ISD::XOR)
2536 return SDValue();
2538 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2539 if (!Const)
2540 return SDValue();
2542 EVT VT = V.getValueType();
2544 bool IsFlip = false;
2545 switch(TLI.getBooleanContents(VT)) {
2546 case TargetLowering::ZeroOrOneBooleanContent:
2547 IsFlip = Const->isOne();
2548 break;
2549 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2550 IsFlip = Const->isAllOnesValue();
2551 break;
2552 case TargetLowering::UndefinedBooleanContent:
2553 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2554 break;
2557 if (IsFlip)
2558 return V.getOperand(0);
2559 return SDValue();
2562 SDValue DAGCombiner::visitADDO(SDNode *N) {
2563 SDValue N0 = N->getOperand(0);
2564 SDValue N1 = N->getOperand(1);
2565 EVT VT = N0.getValueType();
2566 bool IsSigned = (ISD::SADDO == N->getOpcode());
2568 EVT CarryVT = N->getValueType(1);
2569 SDLoc DL(N);
2571 // If the flag result is dead, turn this into an ADD.
2572 if (!N->hasAnyUseOfValue(1))
2573 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2574 DAG.getUNDEF(CarryVT));
2576 // canonicalize constant to RHS.
2577 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2578 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2579 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2581 // fold (addo x, 0) -> x + no carry out
2582 if (isNullOrNullSplat(N1))
2583 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2585 if (!IsSigned) {
2586 // If it cannot overflow, transform into an add.
2587 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2588 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2589 DAG.getConstant(0, DL, CarryVT));
2591 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2592 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2593 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2594 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2595 return CombineTo(N, Sub,
2596 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2599 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2600 return Combined;
2602 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2603 return Combined;
2606 return SDValue();
2609 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2610 EVT VT = N0.getValueType();
2611 if (VT.isVector())
2612 return SDValue();
2614 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2615 // If Y + 1 cannot overflow.
2616 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2617 SDValue Y = N1.getOperand(0);
2618 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2619 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2620 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2621 N1.getOperand(2));
2624 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2625 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2626 if (SDValue Carry = getAsCarry(TLI, N1))
2627 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2628 DAG.getConstant(0, SDLoc(N), VT), Carry);
2630 return SDValue();
2633 SDValue DAGCombiner::visitADDE(SDNode *N) {
2634 SDValue N0 = N->getOperand(0);
2635 SDValue N1 = N->getOperand(1);
2636 SDValue CarryIn = N->getOperand(2);
2638 // canonicalize constant to RHS
2639 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2640 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2641 if (N0C && !N1C)
2642 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2643 N1, N0, CarryIn);
2645 // fold (adde x, y, false) -> (addc x, y)
2646 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2647 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2649 return SDValue();
2652 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2653 SDValue N0 = N->getOperand(0);
2654 SDValue N1 = N->getOperand(1);
2655 SDValue CarryIn = N->getOperand(2);
2656 SDLoc DL(N);
2658 // canonicalize constant to RHS
2659 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2660 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2661 if (N0C && !N1C)
2662 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2664 // fold (addcarry x, y, false) -> (uaddo x, y)
2665 if (isNullConstant(CarryIn)) {
2666 if (!LegalOperations ||
2667 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2668 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2671 EVT CarryVT = CarryIn.getValueType();
2673 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2674 if (isNullConstant(N0) && isNullConstant(N1)) {
2675 EVT VT = N0.getValueType();
2676 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2677 AddToWorklist(CarryExt.getNode());
2678 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2679 DAG.getConstant(1, DL, VT)),
2680 DAG.getConstant(0, DL, CarryVT));
2683 // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2684 if (isBitwiseNot(N0) && isNullConstant(N1)) {
2685 if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2686 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2687 DAG.getConstant(0, DL, N0.getValueType()),
2688 N0.getOperand(0), B);
2689 return CombineTo(N, Sub,
2690 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2694 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2695 return Combined;
2697 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2698 return Combined;
2700 return SDValue();
2703 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2704 SDNode *N) {
2705 // Iff the flag result is dead:
2706 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2707 if ((N0.getOpcode() == ISD::ADD ||
2708 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2709 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2710 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2711 N0.getOperand(0), N0.getOperand(1), CarryIn);
2714 * When one of the addcarry argument is itself a carry, we may be facing
2715 * a diamond carry propagation. In which case we try to transform the DAG
2716 * to ensure linear carry propagation if that is possible.
2718 * We are trying to get:
2719 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2721 if (auto Y = getAsCarry(TLI, N1)) {
2723 * (uaddo A, B)
2724 * / \
2725 * Carry Sum
2726 * | \
2727 * | (addcarry *, 0, Z)
2728 * | /
2729 * \ Carry
2730 * | /
2731 * (addcarry X, *, *)
2733 if (Y.getOpcode() == ISD::UADDO &&
2734 CarryIn.getResNo() == 1 &&
2735 CarryIn.getOpcode() == ISD::ADDCARRY &&
2736 isNullConstant(CarryIn.getOperand(1)) &&
2737 CarryIn.getOperand(0) == Y.getValue(0)) {
2738 auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2739 Y.getOperand(0), Y.getOperand(1),
2740 CarryIn.getOperand(2));
2741 AddToWorklist(NewY.getNode());
2742 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2743 DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2744 NewY.getValue(1));
2748 return SDValue();
2751 // Since it may not be valid to emit a fold to zero for vector initializers
2752 // check if we can before folding.
2753 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2754 SelectionDAG &DAG, bool LegalOperations) {
2755 if (!VT.isVector())
2756 return DAG.getConstant(0, DL, VT);
2757 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2758 return DAG.getConstant(0, DL, VT);
2759 return SDValue();
2762 SDValue DAGCombiner::visitSUB(SDNode *N) {
2763 SDValue N0 = N->getOperand(0);
2764 SDValue N1 = N->getOperand(1);
2765 EVT VT = N0.getValueType();
2766 SDLoc DL(N);
2768 // fold vector ops
2769 if (VT.isVector()) {
2770 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2771 return FoldedVOp;
2773 // fold (sub x, 0) -> x, vector edition
2774 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2775 return N0;
2778 // fold (sub x, x) -> 0
2779 // FIXME: Refactor this and xor and other similar operations together.
2780 if (N0 == N1)
2781 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2782 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2783 DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2784 // fold (sub c1, c2) -> c1-c2
2785 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2786 N1.getNode());
2789 if (SDValue NewSel = foldBinOpIntoSelect(N))
2790 return NewSel;
2792 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2794 // fold (sub x, c) -> (add x, -c)
2795 if (N1C) {
2796 return DAG.getNode(ISD::ADD, DL, VT, N0,
2797 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2800 if (isNullOrNullSplat(N0)) {
2801 unsigned BitWidth = VT.getScalarSizeInBits();
2802 // Right-shifting everything out but the sign bit followed by negation is
2803 // the same as flipping arithmetic/logical shift type without the negation:
2804 // -(X >>u 31) -> (X >>s 31)
2805 // -(X >>s 31) -> (X >>u 31)
2806 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2807 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2808 if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2809 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2810 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2811 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2815 // 0 - X --> 0 if the sub is NUW.
2816 if (N->getFlags().hasNoUnsignedWrap())
2817 return N0;
2819 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2820 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2821 // N1 must be 0 because negating the minimum signed value is undefined.
2822 if (N->getFlags().hasNoSignedWrap())
2823 return N0;
2825 // 0 - X --> X if X is 0 or the minimum signed value.
2826 return N1;
2830 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2831 if (isAllOnesOrAllOnesSplat(N0))
2832 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2834 // fold (A - (0-B)) -> A+B
2835 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2836 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2838 // fold A-(A-B) -> B
2839 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2840 return N1.getOperand(1);
2842 // fold (A+B)-A -> B
2843 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2844 return N0.getOperand(1);
2846 // fold (A+B)-B -> A
2847 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2848 return N0.getOperand(0);
2850 // fold C2-(A+C1) -> (C2-C1)-A
2851 if (N1.getOpcode() == ISD::ADD) {
2852 SDValue N11 = N1.getOperand(1);
2853 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2854 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2855 SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2856 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2860 // fold ((A+(B+or-C))-B) -> A+or-C
2861 if (N0.getOpcode() == ISD::ADD &&
2862 (N0.getOperand(1).getOpcode() == ISD::SUB ||
2863 N0.getOperand(1).getOpcode() == ISD::ADD) &&
2864 N0.getOperand(1).getOperand(0) == N1)
2865 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2866 N0.getOperand(1).getOperand(1));
2868 // fold ((A+(C+B))-B) -> A+C
2869 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2870 N0.getOperand(1).getOperand(1) == N1)
2871 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2872 N0.getOperand(1).getOperand(0));
2874 // fold ((A-(B-C))-C) -> A-B
2875 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2876 N0.getOperand(1).getOperand(1) == N1)
2877 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2878 N0.getOperand(1).getOperand(0));
2880 // fold (A-(B-C)) -> A+(C-B)
2881 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2882 return DAG.getNode(ISD::ADD, DL, VT, N0,
2883 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2884 N1.getOperand(0)));
2886 // fold (X - (-Y * Z)) -> (X + (Y * Z))
2887 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2888 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2889 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2890 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2891 N1.getOperand(0).getOperand(1),
2892 N1.getOperand(1));
2893 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2895 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2896 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2897 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2898 N1.getOperand(0),
2899 N1.getOperand(1).getOperand(1));
2900 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2904 // If either operand of a sub is undef, the result is undef
2905 if (N0.isUndef())
2906 return N0;
2907 if (N1.isUndef())
2908 return N1;
2910 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2911 return V;
2913 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2914 return V;
2916 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2917 return V;
2919 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2920 // rather than 'sub 0/1' (the sext should get folded).
2921 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2922 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2923 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2924 TLI.getBooleanContents(VT) ==
2925 TargetLowering::ZeroOrNegativeOneBooleanContent) {
2926 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2927 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2930 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2931 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2932 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2933 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2934 SDValue S0 = N1.getOperand(0);
2935 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2936 unsigned OpSizeInBits = VT.getScalarSizeInBits();
2937 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2938 if (C->getAPIntValue() == (OpSizeInBits - 1))
2939 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2944 // If the relocation model supports it, consider symbol offsets.
2945 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2946 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2947 // fold (sub Sym, c) -> Sym-c
2948 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2949 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2950 GA->getOffset() -
2951 (uint64_t)N1C->getSExtValue());
2952 // fold (sub Sym+c1, Sym+c2) -> c1-c2
2953 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2954 if (GA->getGlobal() == GB->getGlobal())
2955 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2956 DL, VT);
2959 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2960 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2961 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2962 if (TN->getVT() == MVT::i1) {
2963 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2964 DAG.getConstant(1, DL, VT));
2965 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2969 // Prefer an add for more folding potential and possibly better codegen:
2970 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2971 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2972 SDValue ShAmt = N1.getOperand(1);
2973 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2974 if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2975 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2976 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2980 return SDValue();
2983 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2984 SDValue N0 = N->getOperand(0);
2985 SDValue N1 = N->getOperand(1);
2986 EVT VT = N0.getValueType();
2987 SDLoc DL(N);
2989 // fold vector ops
2990 if (VT.isVector()) {
2991 // TODO SimplifyVBinOp
2993 // fold (sub_sat x, 0) -> x, vector edition
2994 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2995 return N0;
2998 // fold (sub_sat x, undef) -> 0
2999 if (N0.isUndef() || N1.isUndef())
3000 return DAG.getConstant(0, DL, VT);
3002 // fold (sub_sat x, x) -> 0
3003 if (N0 == N1)
3004 return DAG.getConstant(0, DL, VT);
3006 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3007 DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3008 // fold (sub_sat c1, c2) -> c3
3009 return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3010 N1.getNode());
3013 // fold (sub_sat x, 0) -> x
3014 if (isNullConstant(N1))
3015 return N0;
3017 return SDValue();
3020 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3021 SDValue N0 = N->getOperand(0);
3022 SDValue N1 = N->getOperand(1);
3023 EVT VT = N0.getValueType();
3024 SDLoc DL(N);
3026 // If the flag result is dead, turn this into an SUB.
3027 if (!N->hasAnyUseOfValue(1))
3028 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3029 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3031 // fold (subc x, x) -> 0 + no borrow
3032 if (N0 == N1)
3033 return CombineTo(N, DAG.getConstant(0, DL, VT),
3034 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3036 // fold (subc x, 0) -> x + no borrow
3037 if (isNullConstant(N1))
3038 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3040 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3041 if (isAllOnesConstant(N0))
3042 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3043 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3045 return SDValue();
3048 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3049 SDValue N0 = N->getOperand(0);
3050 SDValue N1 = N->getOperand(1);
3051 EVT VT = N0.getValueType();
3052 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3054 EVT CarryVT = N->getValueType(1);
3055 SDLoc DL(N);
3057 // If the flag result is dead, turn this into an SUB.
3058 if (!N->hasAnyUseOfValue(1))
3059 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3060 DAG.getUNDEF(CarryVT));
3062 // fold (subo x, x) -> 0 + no borrow
3063 if (N0 == N1)
3064 return CombineTo(N, DAG.getConstant(0, DL, VT),
3065 DAG.getConstant(0, DL, CarryVT));
3067 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3069 // fold (subox, c) -> (addo x, -c)
3070 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3071 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3072 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3075 // fold (subo x, 0) -> x + no borrow
3076 if (isNullOrNullSplat(N1))
3077 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3079 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3080 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3081 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3082 DAG.getConstant(0, DL, CarryVT));
3084 return SDValue();
3087 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3088 SDValue N0 = N->getOperand(0);
3089 SDValue N1 = N->getOperand(1);
3090 SDValue CarryIn = N->getOperand(2);
3092 // fold (sube x, y, false) -> (subc x, y)
3093 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3094 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3096 return SDValue();
3099 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3100 SDValue N0 = N->getOperand(0);
3101 SDValue N1 = N->getOperand(1);
3102 SDValue CarryIn = N->getOperand(2);
3104 // fold (subcarry x, y, false) -> (usubo x, y)
3105 if (isNullConstant(CarryIn)) {
3106 if (!LegalOperations ||
3107 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3108 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3111 return SDValue();
3114 SDValue DAGCombiner::visitMUL(SDNode *N) {
3115 SDValue N0 = N->getOperand(0);
3116 SDValue N1 = N->getOperand(1);
3117 EVT VT = N0.getValueType();
3119 // fold (mul x, undef) -> 0
3120 if (N0.isUndef() || N1.isUndef())
3121 return DAG.getConstant(0, SDLoc(N), VT);
3123 bool N0IsConst = false;
3124 bool N1IsConst = false;
3125 bool N1IsOpaqueConst = false;
3126 bool N0IsOpaqueConst = false;
3127 APInt ConstValue0, ConstValue1;
3128 // fold vector ops
3129 if (VT.isVector()) {
3130 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3131 return FoldedVOp;
3133 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3134 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3135 assert((!N0IsConst ||
3136 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3137 "Splat APInt should be element width");
3138 assert((!N1IsConst ||
3139 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3140 "Splat APInt should be element width");
3141 } else {
3142 N0IsConst = isa<ConstantSDNode>(N0);
3143 if (N0IsConst) {
3144 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3145 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3147 N1IsConst = isa<ConstantSDNode>(N1);
3148 if (N1IsConst) {
3149 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3150 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3154 // fold (mul c1, c2) -> c1*c2
3155 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3156 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3157 N0.getNode(), N1.getNode());
3159 // canonicalize constant to RHS (vector doesn't have to splat)
3160 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3161 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3162 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3163 // fold (mul x, 0) -> 0
3164 if (N1IsConst && ConstValue1.isNullValue())
3165 return N1;
3166 // fold (mul x, 1) -> x
3167 if (N1IsConst && ConstValue1.isOneValue())
3168 return N0;
3170 if (SDValue NewSel = foldBinOpIntoSelect(N))
3171 return NewSel;
3173 // fold (mul x, -1) -> 0-x
3174 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3175 SDLoc DL(N);
3176 return DAG.getNode(ISD::SUB, DL, VT,
3177 DAG.getConstant(0, DL, VT), N0);
3179 // fold (mul x, (1 << c)) -> x << c
3180 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3181 DAG.isKnownToBeAPowerOfTwo(N1) &&
3182 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3183 SDLoc DL(N);
3184 SDValue LogBase2 = BuildLogBase2(N1, DL);
3185 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3186 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3187 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3189 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3190 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3191 unsigned Log2Val = (-ConstValue1).logBase2();
3192 SDLoc DL(N);
3193 // FIXME: If the input is something that is easily negated (e.g. a
3194 // single-use add), we should put the negate there.
3195 return DAG.getNode(ISD::SUB, DL, VT,
3196 DAG.getConstant(0, DL, VT),
3197 DAG.getNode(ISD::SHL, DL, VT, N0,
3198 DAG.getConstant(Log2Val, DL,
3199 getShiftAmountTy(N0.getValueType()))));
3202 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3203 // mul x, (2^N + 1) --> add (shl x, N), x
3204 // mul x, (2^N - 1) --> sub (shl x, N), x
3205 // Examples: x * 33 --> (x << 5) + x
3206 // x * 15 --> (x << 4) - x
3207 // x * -33 --> -((x << 5) + x)
3208 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3209 if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3210 // TODO: We could handle more general decomposition of any constant by
3211 // having the target set a limit on number of ops and making a
3212 // callback to determine that sequence (similar to sqrt expansion).
3213 unsigned MathOp = ISD::DELETED_NODE;
3214 APInt MulC = ConstValue1.abs();
3215 if ((MulC - 1).isPowerOf2())
3216 MathOp = ISD::ADD;
3217 else if ((MulC + 1).isPowerOf2())
3218 MathOp = ISD::SUB;
3220 if (MathOp != ISD::DELETED_NODE) {
3221 unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3222 : (MulC + 1).logBase2();
3223 assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3224 "Not expecting multiply-by-constant that could have simplified");
3225 SDLoc DL(N);
3226 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3227 DAG.getConstant(ShAmt, DL, VT));
3228 SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3229 if (ConstValue1.isNegative())
3230 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3231 return R;
3235 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3236 if (N0.getOpcode() == ISD::SHL &&
3237 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3238 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3239 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3240 if (isConstantOrConstantVector(C3))
3241 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3244 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3245 // use.
3247 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3249 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3250 if (N0.getOpcode() == ISD::SHL &&
3251 isConstantOrConstantVector(N0.getOperand(1)) &&
3252 N0.getNode()->hasOneUse()) {
3253 Sh = N0; Y = N1;
3254 } else if (N1.getOpcode() == ISD::SHL &&
3255 isConstantOrConstantVector(N1.getOperand(1)) &&
3256 N1.getNode()->hasOneUse()) {
3257 Sh = N1; Y = N0;
3260 if (Sh.getNode()) {
3261 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3262 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3266 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3267 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3268 N0.getOpcode() == ISD::ADD &&
3269 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3270 isMulAddWithConstProfitable(N, N0, N1))
3271 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3272 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3273 N0.getOperand(0), N1),
3274 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3275 N0.getOperand(1), N1));
3277 // reassociate mul
3278 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3279 return RMUL;
3281 return SDValue();
3284 /// Return true if divmod libcall is available.
3285 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3286 const TargetLowering &TLI) {
3287 RTLIB::Libcall LC;
3288 EVT NodeType = Node->getValueType(0);
3289 if (!NodeType.isSimple())
3290 return false;
3291 switch (NodeType.getSimpleVT().SimpleTy) {
3292 default: return false; // No libcall for vector types.
3293 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3294 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3295 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3296 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3297 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3300 return TLI.getLibcallName(LC) != nullptr;
3303 /// Issue divrem if both quotient and remainder are needed.
3304 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3305 if (Node->use_empty())
3306 return SDValue(); // This is a dead node, leave it alone.
3308 unsigned Opcode = Node->getOpcode();
3309 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3310 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3312 // DivMod lib calls can still work on non-legal types if using lib-calls.
3313 EVT VT = Node->getValueType(0);
3314 if (VT.isVector() || !VT.isInteger())
3315 return SDValue();
3317 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3318 return SDValue();
3320 // If DIVREM is going to get expanded into a libcall,
3321 // but there is no libcall available, then don't combine.
3322 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3323 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3324 return SDValue();
3326 // If div is legal, it's better to do the normal expansion
3327 unsigned OtherOpcode = 0;
3328 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3329 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3330 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3331 return SDValue();
3332 } else {
3333 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3334 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3335 return SDValue();
3338 SDValue Op0 = Node->getOperand(0);
3339 SDValue Op1 = Node->getOperand(1);
3340 SDValue combined;
3341 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3342 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3343 SDNode *User = *UI;
3344 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3345 User->use_empty())
3346 continue;
3347 // Convert the other matching node(s), too;
3348 // otherwise, the DIVREM may get target-legalized into something
3349 // target-specific that we won't be able to recognize.
3350 unsigned UserOpc = User->getOpcode();
3351 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3352 User->getOperand(0) == Op0 &&
3353 User->getOperand(1) == Op1) {
3354 if (!combined) {
3355 if (UserOpc == OtherOpcode) {
3356 SDVTList VTs = DAG.getVTList(VT, VT);
3357 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3358 } else if (UserOpc == DivRemOpc) {
3359 combined = SDValue(User, 0);
3360 } else {
3361 assert(UserOpc == Opcode);
3362 continue;
3365 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3366 CombineTo(User, combined);
3367 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3368 CombineTo(User, combined.getValue(1));
3371 return combined;
3374 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3375 SDValue N0 = N->getOperand(0);
3376 SDValue N1 = N->getOperand(1);
3377 EVT VT = N->getValueType(0);
3378 SDLoc DL(N);
3380 unsigned Opc = N->getOpcode();
3381 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3382 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3384 // X / undef -> undef
3385 // X % undef -> undef
3386 // X / 0 -> undef
3387 // X % 0 -> undef
3388 // NOTE: This includes vectors where any divisor element is zero/undef.
3389 if (DAG.isUndef(Opc, {N0, N1}))
3390 return DAG.getUNDEF(VT);
3392 // undef / X -> 0
3393 // undef % X -> 0
3394 if (N0.isUndef())
3395 return DAG.getConstant(0, DL, VT);
3397 // 0 / X -> 0
3398 // 0 % X -> 0
3399 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3400 if (N0C && N0C->isNullValue())
3401 return N0;
3403 // X / X -> 1
3404 // X % X -> 0
3405 if (N0 == N1)
3406 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3408 // X / 1 -> X
3409 // X % 1 -> 0
3410 // If this is a boolean op (single-bit element type), we can't have
3411 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3412 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3413 // it's a 1.
3414 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3415 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3417 return SDValue();
3420 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3421 SDValue N0 = N->getOperand(0);
3422 SDValue N1 = N->getOperand(1);
3423 EVT VT = N->getValueType(0);
3424 EVT CCVT = getSetCCResultType(VT);
3426 // fold vector ops
3427 if (VT.isVector())
3428 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3429 return FoldedVOp;
3431 SDLoc DL(N);
3433 // fold (sdiv c1, c2) -> c1/c2
3434 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3435 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3436 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3437 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3438 // fold (sdiv X, -1) -> 0-X
3439 if (N1C && N1C->isAllOnesValue())
3440 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3441 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3442 if (N1C && N1C->getAPIntValue().isMinSignedValue())
3443 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3444 DAG.getConstant(1, DL, VT),
3445 DAG.getConstant(0, DL, VT));
3447 if (SDValue V = simplifyDivRem(N, DAG))
3448 return V;
3450 if (SDValue NewSel = foldBinOpIntoSelect(N))
3451 return NewSel;
3453 // If we know the sign bits of both operands are zero, strength reduce to a
3454 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3455 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3456 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3458 if (SDValue V = visitSDIVLike(N0, N1, N)) {
3459 // If the corresponding remainder node exists, update its users with
3460 // (Dividend - (Quotient * Divisor).
3461 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3462 { N0, N1 })) {
3463 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3464 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3465 AddToWorklist(Mul.getNode());
3466 AddToWorklist(Sub.getNode());
3467 CombineTo(RemNode, Sub);
3469 return V;
3472 // sdiv, srem -> sdivrem
3473 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3474 // true. Otherwise, we break the simplification logic in visitREM().
3475 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3476 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3477 if (SDValue DivRem = useDivRem(N))
3478 return DivRem;
3480 return SDValue();
3483 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3484 SDLoc DL(N);
3485 EVT VT = N->getValueType(0);
3486 EVT CCVT = getSetCCResultType(VT);
3487 unsigned BitWidth = VT.getScalarSizeInBits();
3489 // Helper for determining whether a value is a power-2 constant scalar or a
3490 // vector of such elements.
3491 auto IsPowerOfTwo = [](ConstantSDNode *C) {
3492 if (C->isNullValue() || C->isOpaque())
3493 return false;
3494 if (C->getAPIntValue().isPowerOf2())
3495 return true;
3496 if ((-C->getAPIntValue()).isPowerOf2())
3497 return true;
3498 return false;
3501 // fold (sdiv X, pow2) -> simple ops after legalize
3502 // FIXME: We check for the exact bit here because the generic lowering gives
3503 // better results in that case. The target-specific lowering should learn how
3504 // to handle exact sdivs efficiently.
3505 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3506 // Target-specific implementation of sdiv x, pow2.
3507 if (SDValue Res = BuildSDIVPow2(N))
3508 return Res;
3510 // Create constants that are functions of the shift amount value.
3511 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3512 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3513 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3514 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3515 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3516 if (!isConstantOrConstantVector(Inexact))
3517 return SDValue();
3519 // Splat the sign bit into the register
3520 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3521 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3522 AddToWorklist(Sign.getNode());
3524 // Add (N0 < 0) ? abs2 - 1 : 0;
3525 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3526 AddToWorklist(Srl.getNode());
3527 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3528 AddToWorklist(Add.getNode());
3529 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3530 AddToWorklist(Sra.getNode());
3532 // Special case: (sdiv X, 1) -> X
3533 // Special Case: (sdiv X, -1) -> 0-X
3534 SDValue One = DAG.getConstant(1, DL, VT);
3535 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3536 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3537 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3538 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3539 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3541 // If dividing by a positive value, we're done. Otherwise, the result must
3542 // be negated.
3543 SDValue Zero = DAG.getConstant(0, DL, VT);
3544 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3546 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3547 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3548 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3549 return Res;
3552 // If integer divide is expensive and we satisfy the requirements, emit an
3553 // alternate sequence. Targets may check function attributes for size/speed
3554 // trade-offs.
3555 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3556 if (isConstantOrConstantVector(N1) &&
3557 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3558 if (SDValue Op = BuildSDIV(N))
3559 return Op;
3561 return SDValue();
3564 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3565 SDValue N0 = N->getOperand(0);
3566 SDValue N1 = N->getOperand(1);
3567 EVT VT = N->getValueType(0);
3568 EVT CCVT = getSetCCResultType(VT);
3570 // fold vector ops
3571 if (VT.isVector())
3572 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3573 return FoldedVOp;
3575 SDLoc DL(N);
3577 // fold (udiv c1, c2) -> c1/c2
3578 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3579 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3580 if (N0C && N1C)
3581 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3582 N0C, N1C))
3583 return Folded;
3584 // fold (udiv X, -1) -> select(X == -1, 1, 0)
3585 if (N1C && N1C->getAPIntValue().isAllOnesValue())
3586 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3587 DAG.getConstant(1, DL, VT),
3588 DAG.getConstant(0, DL, VT));
3590 if (SDValue V = simplifyDivRem(N, DAG))
3591 return V;
3593 if (SDValue NewSel = foldBinOpIntoSelect(N))
3594 return NewSel;
3596 if (SDValue V = visitUDIVLike(N0, N1, N)) {
3597 // If the corresponding remainder node exists, update its users with
3598 // (Dividend - (Quotient * Divisor).
3599 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3600 { N0, N1 })) {
3601 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3602 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3603 AddToWorklist(Mul.getNode());
3604 AddToWorklist(Sub.getNode());
3605 CombineTo(RemNode, Sub);
3607 return V;
3610 // sdiv, srem -> sdivrem
3611 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3612 // true. Otherwise, we break the simplification logic in visitREM().
3613 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3614 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3615 if (SDValue DivRem = useDivRem(N))
3616 return DivRem;
3618 return SDValue();
3621 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3622 SDLoc DL(N);
3623 EVT VT = N->getValueType(0);
3625 // fold (udiv x, (1 << c)) -> x >>u c
3626 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3627 DAG.isKnownToBeAPowerOfTwo(N1)) {
3628 SDValue LogBase2 = BuildLogBase2(N1, DL);
3629 AddToWorklist(LogBase2.getNode());
3631 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3632 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3633 AddToWorklist(Trunc.getNode());
3634 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3637 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3638 if (N1.getOpcode() == ISD::SHL) {
3639 SDValue N10 = N1.getOperand(0);
3640 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3641 DAG.isKnownToBeAPowerOfTwo(N10)) {
3642 SDValue LogBase2 = BuildLogBase2(N10, DL);
3643 AddToWorklist(LogBase2.getNode());
3645 EVT ADDVT = N1.getOperand(1).getValueType();
3646 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3647 AddToWorklist(Trunc.getNode());
3648 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3649 AddToWorklist(Add.getNode());
3650 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3654 // fold (udiv x, c) -> alternate
3655 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3656 if (isConstantOrConstantVector(N1) &&
3657 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3658 if (SDValue Op = BuildUDIV(N))
3659 return Op;
3661 return SDValue();
3664 // handles ISD::SREM and ISD::UREM
3665 SDValue DAGCombiner::visitREM(SDNode *N) {
3666 unsigned Opcode = N->getOpcode();
3667 SDValue N0 = N->getOperand(0);
3668 SDValue N1 = N->getOperand(1);
3669 EVT VT = N->getValueType(0);
3670 EVT CCVT = getSetCCResultType(VT);
3672 bool isSigned = (Opcode == ISD::SREM);
3673 SDLoc DL(N);
3675 // fold (rem c1, c2) -> c1%c2
3676 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3677 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3678 if (N0C && N1C)
3679 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3680 return Folded;
3681 // fold (urem X, -1) -> select(X == -1, 0, x)
3682 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3683 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3684 DAG.getConstant(0, DL, VT), N0);
3686 if (SDValue V = simplifyDivRem(N, DAG))
3687 return V;
3689 if (SDValue NewSel = foldBinOpIntoSelect(N))
3690 return NewSel;
3692 if (isSigned) {
3693 // If we know the sign bits of both operands are zero, strength reduce to a
3694 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3695 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3696 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3697 } else {
3698 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3699 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3700 // fold (urem x, pow2) -> (and x, pow2-1)
3701 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3702 AddToWorklist(Add.getNode());
3703 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3705 if (N1.getOpcode() == ISD::SHL &&
3706 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3707 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3708 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3709 AddToWorklist(Add.getNode());
3710 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3714 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3716 // If X/C can be simplified by the division-by-constant logic, lower
3717 // X%C to the equivalent of X-X/C*C.
3718 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3719 // speculative DIV must not cause a DIVREM conversion. We guard against this
3720 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3721 // combine will not return a DIVREM. Regardless, checking cheapness here
3722 // makes sense since the simplification results in fatter code.
3723 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3724 SDValue OptimizedDiv =
3725 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3726 if (OptimizedDiv.getNode()) {
3727 // If the equivalent Div node also exists, update its users.
3728 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3729 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3730 { N0, N1 }))
3731 CombineTo(DivNode, OptimizedDiv);
3732 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3733 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3734 AddToWorklist(OptimizedDiv.getNode());
3735 AddToWorklist(Mul.getNode());
3736 return Sub;
3740 // sdiv, srem -> sdivrem
3741 if (SDValue DivRem = useDivRem(N))
3742 return DivRem.getValue(1);
3744 return SDValue();
3747 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3748 SDValue N0 = N->getOperand(0);
3749 SDValue N1 = N->getOperand(1);
3750 EVT VT = N->getValueType(0);
3751 SDLoc DL(N);
3753 if (VT.isVector()) {
3754 // fold (mulhs x, 0) -> 0
3755 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3756 return N1;
3757 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3758 return N0;
3761 // fold (mulhs x, 0) -> 0
3762 if (isNullConstant(N1))
3763 return N1;
3764 // fold (mulhs x, 1) -> (sra x, size(x)-1)
3765 if (isOneConstant(N1))
3766 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3767 DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3768 getShiftAmountTy(N0.getValueType())));
3770 // fold (mulhs x, undef) -> 0
3771 if (N0.isUndef() || N1.isUndef())
3772 return DAG.getConstant(0, DL, VT);
3774 // If the type twice as wide is legal, transform the mulhs to a wider multiply
3775 // plus a shift.
3776 if (VT.isSimple() && !VT.isVector()) {
3777 MVT Simple = VT.getSimpleVT();
3778 unsigned SimpleSize = Simple.getSizeInBits();
3779 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3780 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3781 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3782 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3783 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3784 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3785 DAG.getConstant(SimpleSize, DL,
3786 getShiftAmountTy(N1.getValueType())));
3787 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3791 return SDValue();
3794 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3795 SDValue N0 = N->getOperand(0);
3796 SDValue N1 = N->getOperand(1);
3797 EVT VT = N->getValueType(0);
3798 SDLoc DL(N);
3800 if (VT.isVector()) {
3801 // fold (mulhu x, 0) -> 0
3802 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3803 return N1;
3804 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3805 return N0;
3808 // fold (mulhu x, 0) -> 0
3809 if (isNullConstant(N1))
3810 return N1;
3811 // fold (mulhu x, 1) -> 0
3812 if (isOneConstant(N1))
3813 return DAG.getConstant(0, DL, N0.getValueType());
3814 // fold (mulhu x, undef) -> 0
3815 if (N0.isUndef() || N1.isUndef())
3816 return DAG.getConstant(0, DL, VT);
3818 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3819 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3820 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3821 unsigned NumEltBits = VT.getScalarSizeInBits();
3822 SDValue LogBase2 = BuildLogBase2(N1, DL);
3823 SDValue SRLAmt = DAG.getNode(
3824 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3825 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3826 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3827 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3830 // If the type twice as wide is legal, transform the mulhu to a wider multiply
3831 // plus a shift.
3832 if (VT.isSimple() && !VT.isVector()) {
3833 MVT Simple = VT.getSimpleVT();
3834 unsigned SimpleSize = Simple.getSizeInBits();
3835 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3836 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3837 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3838 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3839 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3840 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3841 DAG.getConstant(SimpleSize, DL,
3842 getShiftAmountTy(N1.getValueType())));
3843 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3847 return SDValue();
3850 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3851 /// give the opcodes for the two computations that are being performed. Return
3852 /// true if a simplification was made.
3853 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3854 unsigned HiOp) {
3855 // If the high half is not needed, just compute the low half.
3856 bool HiExists = N->hasAnyUseOfValue(1);
3857 if (!HiExists && (!LegalOperations ||
3858 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3859 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3860 return CombineTo(N, Res, Res);
3863 // If the low half is not needed, just compute the high half.
3864 bool LoExists = N->hasAnyUseOfValue(0);
3865 if (!LoExists && (!LegalOperations ||
3866 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3867 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3868 return CombineTo(N, Res, Res);
3871 // If both halves are used, return as it is.
3872 if (LoExists && HiExists)
3873 return SDValue();
3875 // If the two computed results can be simplified separately, separate them.
3876 if (LoExists) {
3877 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3878 AddToWorklist(Lo.getNode());
3879 SDValue LoOpt = combine(Lo.getNode());
3880 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3881 (!LegalOperations ||
3882 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3883 return CombineTo(N, LoOpt, LoOpt);
3886 if (HiExists) {
3887 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3888 AddToWorklist(Hi.getNode());
3889 SDValue HiOpt = combine(Hi.getNode());
3890 if (HiOpt.getNode() && HiOpt != Hi &&
3891 (!LegalOperations ||
3892 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3893 return CombineTo(N, HiOpt, HiOpt);
3896 return SDValue();
3899 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3900 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3901 return Res;
3903 EVT VT = N->getValueType(0);
3904 SDLoc DL(N);
3906 // If the type is twice as wide is legal, transform the mulhu to a wider
3907 // multiply plus a shift.
3908 if (VT.isSimple() && !VT.isVector()) {
3909 MVT Simple = VT.getSimpleVT();
3910 unsigned SimpleSize = Simple.getSizeInBits();
3911 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3912 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3913 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3914 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3915 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3916 // Compute the high part as N1.
3917 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3918 DAG.getConstant(SimpleSize, DL,
3919 getShiftAmountTy(Lo.getValueType())));
3920 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3921 // Compute the low part as N0.
3922 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3923 return CombineTo(N, Lo, Hi);
3927 return SDValue();
3930 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3931 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3932 return Res;
3934 EVT VT = N->getValueType(0);
3935 SDLoc DL(N);
3937 // If the type is twice as wide is legal, transform the mulhu to a wider
3938 // multiply plus a shift.
3939 if (VT.isSimple() && !VT.isVector()) {
3940 MVT Simple = VT.getSimpleVT();
3941 unsigned SimpleSize = Simple.getSizeInBits();
3942 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3943 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3944 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3945 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3946 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3947 // Compute the high part as N1.
3948 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3949 DAG.getConstant(SimpleSize, DL,
3950 getShiftAmountTy(Lo.getValueType())));
3951 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3952 // Compute the low part as N0.
3953 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3954 return CombineTo(N, Lo, Hi);
3958 return SDValue();
3961 SDValue DAGCombiner::visitMULO(SDNode *N) {
3962 bool IsSigned = (ISD::SMULO == N->getOpcode());
3964 // (mulo x, 2) -> (addo x, x)
3965 if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
3966 if (C2->getAPIntValue() == 2)
3967 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
3968 N->getVTList(), N->getOperand(0), N->getOperand(0));
3970 return SDValue();
3973 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3974 SDValue N0 = N->getOperand(0);
3975 SDValue N1 = N->getOperand(1);
3976 EVT VT = N0.getValueType();
3978 // fold vector ops
3979 if (VT.isVector())
3980 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3981 return FoldedVOp;
3983 // fold operation with constant operands.
3984 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3985 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3986 if (N0C && N1C)
3987 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3989 // canonicalize constant to RHS
3990 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3991 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3992 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3994 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3995 // Only do this if the current op isn't legal and the flipped is.
3996 unsigned Opcode = N->getOpcode();
3997 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3998 if (!TLI.isOperationLegal(Opcode, VT) &&
3999 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4000 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4001 unsigned AltOpcode;
4002 switch (Opcode) {
4003 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4004 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4005 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4006 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4007 default: llvm_unreachable("Unknown MINMAX opcode");
4009 if (TLI.isOperationLegal(AltOpcode, VT))
4010 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4013 return SDValue();
4016 /// If this is a bitwise logic instruction and both operands have the same
4017 /// opcode, try to sink the other opcode after the logic instruction.
4018 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4019 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4020 EVT VT = N0.getValueType();
4021 unsigned LogicOpcode = N->getOpcode();
4022 unsigned HandOpcode = N0.getOpcode();
4023 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4024 LogicOpcode == ISD::XOR) && "Expected logic opcode");
4025 assert(HandOpcode == N1.getOpcode() && "Bad input!");
4027 // Bail early if none of these transforms apply.
4028 if (N0.getNumOperands() == 0)
4029 return SDValue();
4031 // FIXME: We should check number of uses of the operands to not increase
4032 // the instruction count for all transforms.
4034 // Handle size-changing casts.
4035 SDValue X = N0.getOperand(0);
4036 SDValue Y = N1.getOperand(0);
4037 EVT XVT = X.getValueType();
4038 SDLoc DL(N);
4039 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4040 HandOpcode == ISD::SIGN_EXTEND) {
4041 // If both operands have other uses, this transform would create extra
4042 // instructions without eliminating anything.
4043 if (!N0.hasOneUse() && !N1.hasOneUse())
4044 return SDValue();
4045 // We need matching integer source types.
4046 if (XVT != Y.getValueType())
4047 return SDValue();
4048 // Don't create an illegal op during or after legalization. Don't ever
4049 // create an unsupported vector op.
4050 if ((VT.isVector() || LegalOperations) &&
4051 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4052 return SDValue();
4053 // Avoid infinite looping with PromoteIntBinOp.
4054 // TODO: Should we apply desirable/legal constraints to all opcodes?
4055 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4056 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4057 return SDValue();
4058 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4059 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4060 return DAG.getNode(HandOpcode, DL, VT, Logic);
4063 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4064 if (HandOpcode == ISD::TRUNCATE) {
4065 // If both operands have other uses, this transform would create extra
4066 // instructions without eliminating anything.
4067 if (!N0.hasOneUse() && !N1.hasOneUse())
4068 return SDValue();
4069 // We need matching source types.
4070 if (XVT != Y.getValueType())
4071 return SDValue();
4072 // Don't create an illegal op during or after legalization.
4073 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4074 return SDValue();
4075 // Be extra careful sinking truncate. If it's free, there's no benefit in
4076 // widening a binop. Also, don't create a logic op on an illegal type.
4077 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4078 return SDValue();
4079 if (!TLI.isTypeLegal(XVT))
4080 return SDValue();
4081 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4082 return DAG.getNode(HandOpcode, DL, VT, Logic);
4085 // For binops SHL/SRL/SRA/AND:
4086 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4087 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4088 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4089 N0.getOperand(1) == N1.getOperand(1)) {
4090 // If either operand has other uses, this transform is not an improvement.
4091 if (!N0.hasOneUse() || !N1.hasOneUse())
4092 return SDValue();
4093 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4094 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4097 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4098 if (HandOpcode == ISD::BSWAP) {
4099 // If either operand has other uses, this transform is not an improvement.
4100 if (!N0.hasOneUse() || !N1.hasOneUse())
4101 return SDValue();
4102 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4103 return DAG.getNode(HandOpcode, DL, VT, Logic);
4106 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4107 // Only perform this optimization up until type legalization, before
4108 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4109 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4110 // we don't want to undo this promotion.
4111 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4112 // on scalars.
4113 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4114 Level <= AfterLegalizeTypes) {
4115 // Input types must be integer and the same.
4116 if (XVT.isInteger() && XVT == Y.getValueType()) {
4117 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4118 return DAG.getNode(HandOpcode, DL, VT, Logic);
4122 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4123 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4124 // If both shuffles use the same mask, and both shuffle within a single
4125 // vector, then it is worthwhile to move the swizzle after the operation.
4126 // The type-legalizer generates this pattern when loading illegal
4127 // vector types from memory. In many cases this allows additional shuffle
4128 // optimizations.
4129 // There are other cases where moving the shuffle after the xor/and/or
4130 // is profitable even if shuffles don't perform a swizzle.
4131 // If both shuffles use the same mask, and both shuffles have the same first
4132 // or second operand, then it might still be profitable to move the shuffle
4133 // after the xor/and/or operation.
4134 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4135 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4136 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4137 assert(X.getValueType() == Y.getValueType() &&
4138 "Inputs to shuffles are not the same type");
4140 // Check that both shuffles use the same mask. The masks are known to be of
4141 // the same length because the result vector type is the same.
4142 // Check also that shuffles have only one use to avoid introducing extra
4143 // instructions.
4144 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4145 !SVN0->getMask().equals(SVN1->getMask()))
4146 return SDValue();
4148 // Don't try to fold this node if it requires introducing a
4149 // build vector of all zeros that might be illegal at this stage.
4150 SDValue ShOp = N0.getOperand(1);
4151 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4152 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4154 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4155 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4156 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4157 N0.getOperand(0), N1.getOperand(0));
4158 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4161 // Don't try to fold this node if it requires introducing a
4162 // build vector of all zeros that might be illegal at this stage.
4163 ShOp = N0.getOperand(0);
4164 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4165 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4167 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4168 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4169 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4170 N1.getOperand(1));
4171 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4175 return SDValue();
4178 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4179 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4180 const SDLoc &DL) {
4181 SDValue LL, LR, RL, RR, N0CC, N1CC;
4182 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4183 !isSetCCEquivalent(N1, RL, RR, N1CC))
4184 return SDValue();
4186 assert(N0.getValueType() == N1.getValueType() &&
4187 "Unexpected operand types for bitwise logic op");
4188 assert(LL.getValueType() == LR.getValueType() &&
4189 RL.getValueType() == RR.getValueType() &&
4190 "Unexpected operand types for setcc");
4192 // If we're here post-legalization or the logic op type is not i1, the logic
4193 // op type must match a setcc result type. Also, all folds require new
4194 // operations on the left and right operands, so those types must match.
4195 EVT VT = N0.getValueType();
4196 EVT OpVT = LL.getValueType();
4197 if (LegalOperations || VT.getScalarType() != MVT::i1)
4198 if (VT != getSetCCResultType(OpVT))
4199 return SDValue();
4200 if (OpVT != RL.getValueType())
4201 return SDValue();
4203 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4204 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4205 bool IsInteger = OpVT.isInteger();
4206 if (LR == RR && CC0 == CC1 && IsInteger) {
4207 bool IsZero = isNullOrNullSplat(LR);
4208 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4210 // All bits clear?
4211 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4212 // All sign bits clear?
4213 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4214 // Any bits set?
4215 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4216 // Any sign bits set?
4217 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4219 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4220 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4221 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4222 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4223 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4224 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4225 AddToWorklist(Or.getNode());
4226 return DAG.getSetCC(DL, VT, Or, LR, CC1);
4229 // All bits set?
4230 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4231 // All sign bits set?
4232 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4233 // Any bits clear?
4234 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4235 // Any sign bits clear?
4236 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4238 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4239 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4240 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4241 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4242 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4243 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4244 AddToWorklist(And.getNode());
4245 return DAG.getSetCC(DL, VT, And, LR, CC1);
4249 // TODO: What is the 'or' equivalent of this fold?
4250 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4251 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4252 IsInteger && CC0 == ISD::SETNE &&
4253 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4254 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4255 SDValue One = DAG.getConstant(1, DL, OpVT);
4256 SDValue Two = DAG.getConstant(2, DL, OpVT);
4257 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4258 AddToWorklist(Add.getNode());
4259 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4262 // Try more general transforms if the predicates match and the only user of
4263 // the compares is the 'and' or 'or'.
4264 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4265 N0.hasOneUse() && N1.hasOneUse()) {
4266 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4267 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4268 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4269 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4270 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4271 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4272 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4273 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4276 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4277 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4278 // Match a shared variable operand and 2 non-opaque constant operands.
4279 ConstantSDNode *C0 = isConstOrConstSplat(LR);
4280 ConstantSDNode *C1 = isConstOrConstSplat(RR);
4281 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4282 // Canonicalize larger constant as C0.
4283 if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4284 std::swap(C0, C1);
4286 // The difference of the constants must be a single bit.
4287 const APInt &C0Val = C0->getAPIntValue();
4288 const APInt &C1Val = C1->getAPIntValue();
4289 if ((C0Val - C1Val).isPowerOf2()) {
4290 // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4291 // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4292 SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4293 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4294 SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4295 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4296 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4297 return DAG.getSetCC(DL, VT, And, Zero, CC0);
4303 // Canonicalize equivalent operands to LL == RL.
4304 if (LL == RR && LR == RL) {
4305 CC1 = ISD::getSetCCSwappedOperands(CC1);
4306 std::swap(RL, RR);
4309 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4310 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4311 if (LL == RL && LR == RR) {
4312 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4313 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4314 if (NewCC != ISD::SETCC_INVALID &&
4315 (!LegalOperations ||
4316 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4317 TLI.isOperationLegal(ISD::SETCC, OpVT))))
4318 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4321 return SDValue();
4324 /// This contains all DAGCombine rules which reduce two values combined by
4325 /// an And operation to a single value. This makes them reusable in the context
4326 /// of visitSELECT(). Rules involving constants are not included as
4327 /// visitSELECT() already handles those cases.
4328 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4329 EVT VT = N1.getValueType();
4330 SDLoc DL(N);
4332 // fold (and x, undef) -> 0
4333 if (N0.isUndef() || N1.isUndef())
4334 return DAG.getConstant(0, DL, VT);
4336 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4337 return V;
4339 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4340 VT.getSizeInBits() <= 64) {
4341 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4342 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4343 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4344 // immediate for an add, but it is legal if its top c2 bits are set,
4345 // transform the ADD so the immediate doesn't need to be materialized
4346 // in a register.
4347 APInt ADDC = ADDI->getAPIntValue();
4348 APInt SRLC = SRLI->getAPIntValue();
4349 if (ADDC.getMinSignedBits() <= 64 &&
4350 SRLC.ult(VT.getSizeInBits()) &&
4351 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4352 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4353 SRLC.getZExtValue());
4354 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4355 ADDC |= Mask;
4356 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4357 SDLoc DL0(N0);
4358 SDValue NewAdd =
4359 DAG.getNode(ISD::ADD, DL0, VT,
4360 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4361 CombineTo(N0.getNode(), NewAdd);
4362 // Return N so it doesn't get rechecked!
4363 return SDValue(N, 0);
4371 // Reduce bit extract of low half of an integer to the narrower type.
4372 // (and (srl i64:x, K), KMask) ->
4373 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4374 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4375 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4376 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4377 unsigned Size = VT.getSizeInBits();
4378 const APInt &AndMask = CAnd->getAPIntValue();
4379 unsigned ShiftBits = CShift->getZExtValue();
4381 // Bail out, this node will probably disappear anyway.
4382 if (ShiftBits == 0)
4383 return SDValue();
4385 unsigned MaskBits = AndMask.countTrailingOnes();
4386 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4388 if (AndMask.isMask() &&
4389 // Required bits must not span the two halves of the integer and
4390 // must fit in the half size type.
4391 (ShiftBits + MaskBits <= Size / 2) &&
4392 TLI.isNarrowingProfitable(VT, HalfVT) &&
4393 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4394 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4395 TLI.isTruncateFree(VT, HalfVT) &&
4396 TLI.isZExtFree(HalfVT, VT)) {
4397 // The isNarrowingProfitable is to avoid regressions on PPC and
4398 // AArch64 which match a few 64-bit bit insert / bit extract patterns
4399 // on downstream users of this. Those patterns could probably be
4400 // extended to handle extensions mixed in.
4402 SDValue SL(N0);
4403 assert(MaskBits <= Size);
4405 // Extracting the highest bit of the low half.
4406 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4407 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4408 N0.getOperand(0));
4410 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4411 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4412 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4413 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4414 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4420 return SDValue();
4423 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4424 EVT LoadResultTy, EVT &ExtVT) {
4425 if (!AndC->getAPIntValue().isMask())
4426 return false;
4428 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4430 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4431 EVT LoadedVT = LoadN->getMemoryVT();
4433 if (ExtVT == LoadedVT &&
4434 (!LegalOperations ||
4435 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4436 // ZEXTLOAD will match without needing to change the size of the value being
4437 // loaded.
4438 return true;
4441 // Do not change the width of a volatile load.
4442 if (LoadN->isVolatile())
4443 return false;
4445 // Do not generate loads of non-round integer types since these can
4446 // be expensive (and would be wrong if the type is not byte sized).
4447 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4448 return false;
4450 if (LegalOperations &&
4451 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4452 return false;
4454 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4455 return false;
4457 return true;
4460 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4461 ISD::LoadExtType ExtType, EVT &MemVT,
4462 unsigned ShAmt) {
4463 if (!LDST)
4464 return false;
4465 // Only allow byte offsets.
4466 if (ShAmt % 8)
4467 return false;
4469 // Do not generate loads of non-round integer types since these can
4470 // be expensive (and would be wrong if the type is not byte sized).
4471 if (!MemVT.isRound())
4472 return false;
4474 // Don't change the width of a volatile load.
4475 if (LDST->isVolatile())
4476 return false;
4478 // Verify that we are actually reducing a load width here.
4479 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4480 return false;
4482 // Ensure that this isn't going to produce an unsupported unaligned access.
4483 if (ShAmt &&
4484 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4485 LDST->getAddressSpace(), ShAmt / 8))
4486 return false;
4488 // It's not possible to generate a constant of extended or untyped type.
4489 EVT PtrType = LDST->getBasePtr().getValueType();
4490 if (PtrType == MVT::Untyped || PtrType.isExtended())
4491 return false;
4493 if (isa<LoadSDNode>(LDST)) {
4494 LoadSDNode *Load = cast<LoadSDNode>(LDST);
4495 // Don't transform one with multiple uses, this would require adding a new
4496 // load.
4497 if (!SDValue(Load, 0).hasOneUse())
4498 return false;
4500 if (LegalOperations &&
4501 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4502 return false;
4504 // For the transform to be legal, the load must produce only two values
4505 // (the value loaded and the chain). Don't transform a pre-increment
4506 // load, for example, which produces an extra value. Otherwise the
4507 // transformation is not equivalent, and the downstream logic to replace
4508 // uses gets things wrong.
4509 if (Load->getNumValues() > 2)
4510 return false;
4512 // If the load that we're shrinking is an extload and we're not just
4513 // discarding the extension we can't simply shrink the load. Bail.
4514 // TODO: It would be possible to merge the extensions in some cases.
4515 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4516 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4517 return false;
4519 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4520 return false;
4521 } else {
4522 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4523 StoreSDNode *Store = cast<StoreSDNode>(LDST);
4524 // Can't write outside the original store
4525 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4526 return false;
4528 if (LegalOperations &&
4529 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4530 return false;
4532 return true;
4535 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4536 SmallVectorImpl<LoadSDNode*> &Loads,
4537 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4538 ConstantSDNode *Mask,
4539 SDNode *&NodeToMask) {
4540 // Recursively search for the operands, looking for loads which can be
4541 // narrowed.
4542 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4543 SDValue Op = N->getOperand(i);
4545 if (Op.getValueType().isVector())
4546 return false;
4548 // Some constants may need fixing up later if they are too large.
4549 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4550 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4551 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4552 NodesWithConsts.insert(N);
4553 continue;
4556 if (!Op.hasOneUse())
4557 return false;
4559 switch(Op.getOpcode()) {
4560 case ISD::LOAD: {
4561 auto *Load = cast<LoadSDNode>(Op);
4562 EVT ExtVT;
4563 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4564 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4566 // ZEXTLOAD is already small enough.
4567 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4568 ExtVT.bitsGE(Load->getMemoryVT()))
4569 continue;
4571 // Use LE to convert equal sized loads to zext.
4572 if (ExtVT.bitsLE(Load->getMemoryVT()))
4573 Loads.push_back(Load);
4575 continue;
4577 return false;
4579 case ISD::ZERO_EXTEND:
4580 case ISD::AssertZext: {
4581 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4582 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4583 EVT VT = Op.getOpcode() == ISD::AssertZext ?
4584 cast<VTSDNode>(Op.getOperand(1))->getVT() :
4585 Op.getOperand(0).getValueType();
4587 // We can accept extending nodes if the mask is wider or an equal
4588 // width to the original type.
4589 if (ExtVT.bitsGE(VT))
4590 continue;
4591 break;
4593 case ISD::OR:
4594 case ISD::XOR:
4595 case ISD::AND:
4596 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4597 NodeToMask))
4598 return false;
4599 continue;
4602 // Allow one node which will masked along with any loads found.
4603 if (NodeToMask)
4604 return false;
4606 // Also ensure that the node to be masked only produces one data result.
4607 NodeToMask = Op.getNode();
4608 if (NodeToMask->getNumValues() > 1) {
4609 bool HasValue = false;
4610 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4611 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4612 if (VT != MVT::Glue && VT != MVT::Other) {
4613 if (HasValue) {
4614 NodeToMask = nullptr;
4615 return false;
4617 HasValue = true;
4620 assert(HasValue && "Node to be masked has no data result?");
4623 return true;
4626 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4627 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4628 if (!Mask)
4629 return false;
4631 if (!Mask->getAPIntValue().isMask())
4632 return false;
4634 // No need to do anything if the and directly uses a load.
4635 if (isa<LoadSDNode>(N->getOperand(0)))
4636 return false;
4638 SmallVector<LoadSDNode*, 8> Loads;
4639 SmallPtrSet<SDNode*, 2> NodesWithConsts;
4640 SDNode *FixupNode = nullptr;
4641 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4642 if (Loads.size() == 0)
4643 return false;
4645 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4646 SDValue MaskOp = N->getOperand(1);
4648 // If it exists, fixup the single node we allow in the tree that needs
4649 // masking.
4650 if (FixupNode) {
4651 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4652 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4653 FixupNode->getValueType(0),
4654 SDValue(FixupNode, 0), MaskOp);
4655 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4656 if (And.getOpcode() == ISD ::AND)
4657 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4660 // Narrow any constants that need it.
4661 for (auto *LogicN : NodesWithConsts) {
4662 SDValue Op0 = LogicN->getOperand(0);
4663 SDValue Op1 = LogicN->getOperand(1);
4665 if (isa<ConstantSDNode>(Op0))
4666 std::swap(Op0, Op1);
4668 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4669 Op1, MaskOp);
4671 DAG.UpdateNodeOperands(LogicN, Op0, And);
4674 // Create narrow loads.
4675 for (auto *Load : Loads) {
4676 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4677 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4678 SDValue(Load, 0), MaskOp);
4679 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4680 if (And.getOpcode() == ISD ::AND)
4681 And = SDValue(
4682 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4683 SDValue NewLoad = ReduceLoadWidth(And.getNode());
4684 assert(NewLoad &&
4685 "Shouldn't be masking the load if it can't be narrowed");
4686 CombineTo(Load, NewLoad, NewLoad.getValue(1));
4688 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4689 return true;
4691 return false;
4694 // Unfold
4695 // x & (-1 'logical shift' y)
4696 // To
4697 // (x 'opposite logical shift' y) 'logical shift' y
4698 // if it is better for performance.
4699 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4700 assert(N->getOpcode() == ISD::AND);
4702 SDValue N0 = N->getOperand(0);
4703 SDValue N1 = N->getOperand(1);
4705 // Do we actually prefer shifts over mask?
4706 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4707 return SDValue();
4709 // Try to match (-1 '[outer] logical shift' y)
4710 unsigned OuterShift;
4711 unsigned InnerShift; // The opposite direction to the OuterShift.
4712 SDValue Y; // Shift amount.
4713 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4714 if (!M.hasOneUse())
4715 return false;
4716 OuterShift = M->getOpcode();
4717 if (OuterShift == ISD::SHL)
4718 InnerShift = ISD::SRL;
4719 else if (OuterShift == ISD::SRL)
4720 InnerShift = ISD::SHL;
4721 else
4722 return false;
4723 if (!isAllOnesConstant(M->getOperand(0)))
4724 return false;
4725 Y = M->getOperand(1);
4726 return true;
4729 SDValue X;
4730 if (matchMask(N1))
4731 X = N0;
4732 else if (matchMask(N0))
4733 X = N1;
4734 else
4735 return SDValue();
4737 SDLoc DL(N);
4738 EVT VT = N->getValueType(0);
4740 // tmp = x 'opposite logical shift' y
4741 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4742 // ret = tmp 'logical shift' y
4743 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4745 return T1;
4748 SDValue DAGCombiner::visitAND(SDNode *N) {
4749 SDValue N0 = N->getOperand(0);
4750 SDValue N1 = N->getOperand(1);
4751 EVT VT = N1.getValueType();
4753 // x & x --> x
4754 if (N0 == N1)
4755 return N0;
4757 // fold vector ops
4758 if (VT.isVector()) {
4759 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4760 return FoldedVOp;
4762 // fold (and x, 0) -> 0, vector edition
4763 if (ISD::isBuildVectorAllZeros(N0.getNode()))
4764 // do not return N0, because undef node may exist in N0
4765 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4766 SDLoc(N), N0.getValueType());
4767 if (ISD::isBuildVectorAllZeros(N1.getNode()))
4768 // do not return N1, because undef node may exist in N1
4769 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4770 SDLoc(N), N1.getValueType());
4772 // fold (and x, -1) -> x, vector edition
4773 if (ISD::isBuildVectorAllOnes(N0.getNode()))
4774 return N1;
4775 if (ISD::isBuildVectorAllOnes(N1.getNode()))
4776 return N0;
4779 // fold (and c1, c2) -> c1&c2
4780 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4781 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4782 if (N0C && N1C && !N1C->isOpaque())
4783 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4784 // canonicalize constant to RHS
4785 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4786 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4787 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4788 // fold (and x, -1) -> x
4789 if (isAllOnesConstant(N1))
4790 return N0;
4791 // if (and x, c) is known to be zero, return 0
4792 unsigned BitWidth = VT.getScalarSizeInBits();
4793 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4794 APInt::getAllOnesValue(BitWidth)))
4795 return DAG.getConstant(0, SDLoc(N), VT);
4797 if (SDValue NewSel = foldBinOpIntoSelect(N))
4798 return NewSel;
4800 // reassociate and
4801 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4802 return RAND;
4804 // Try to convert a constant mask AND into a shuffle clear mask.
4805 if (VT.isVector())
4806 if (SDValue Shuffle = XformToShuffleWithZero(N))
4807 return Shuffle;
4809 // fold (and (or x, C), D) -> D if (C & D) == D
4810 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4811 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4813 if (N0.getOpcode() == ISD::OR &&
4814 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4815 return N1;
4816 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4817 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4818 SDValue N0Op0 = N0.getOperand(0);
4819 APInt Mask = ~N1C->getAPIntValue();
4820 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4821 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4822 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4823 N0.getValueType(), N0Op0);
4825 // Replace uses of the AND with uses of the Zero extend node.
4826 CombineTo(N, Zext);
4828 // We actually want to replace all uses of the any_extend with the
4829 // zero_extend, to avoid duplicating things. This will later cause this
4830 // AND to be folded.
4831 CombineTo(N0.getNode(), Zext);
4832 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4835 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4836 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4837 // already be zero by virtue of the width of the base type of the load.
4839 // the 'X' node here can either be nothing or an extract_vector_elt to catch
4840 // more cases.
4841 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4842 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4843 N0.getOperand(0).getOpcode() == ISD::LOAD &&
4844 N0.getOperand(0).getResNo() == 0) ||
4845 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4846 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4847 N0 : N0.getOperand(0) );
4849 // Get the constant (if applicable) the zero'th operand is being ANDed with.
4850 // This can be a pure constant or a vector splat, in which case we treat the
4851 // vector as a scalar and use the splat value.
4852 APInt Constant = APInt::getNullValue(1);
4853 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4854 Constant = C->getAPIntValue();
4855 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4856 APInt SplatValue, SplatUndef;
4857 unsigned SplatBitSize;
4858 bool HasAnyUndefs;
4859 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4860 SplatBitSize, HasAnyUndefs);
4861 if (IsSplat) {
4862 // Undef bits can contribute to a possible optimisation if set, so
4863 // set them.
4864 SplatValue |= SplatUndef;
4866 // The splat value may be something like "0x00FFFFFF", which means 0 for
4867 // the first vector value and FF for the rest, repeating. We need a mask
4868 // that will apply equally to all members of the vector, so AND all the
4869 // lanes of the constant together.
4870 EVT VT = Vector->getValueType(0);
4871 unsigned BitWidth = VT.getScalarSizeInBits();
4873 // If the splat value has been compressed to a bitlength lower
4874 // than the size of the vector lane, we need to re-expand it to
4875 // the lane size.
4876 if (BitWidth > SplatBitSize)
4877 for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4878 SplatBitSize < BitWidth;
4879 SplatBitSize = SplatBitSize * 2)
4880 SplatValue |= SplatValue.shl(SplatBitSize);
4882 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4883 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4884 if (SplatBitSize % BitWidth == 0) {
4885 Constant = APInt::getAllOnesValue(BitWidth);
4886 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4887 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4892 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4893 // actually legal and isn't going to get expanded, else this is a false
4894 // optimisation.
4895 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4896 Load->getValueType(0),
4897 Load->getMemoryVT());
4899 // Resize the constant to the same size as the original memory access before
4900 // extension. If it is still the AllOnesValue then this AND is completely
4901 // unneeded.
4902 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4904 bool B;
4905 switch (Load->getExtensionType()) {
4906 default: B = false; break;
4907 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4908 case ISD::ZEXTLOAD:
4909 case ISD::NON_EXTLOAD: B = true; break;
4912 if (B && Constant.isAllOnesValue()) {
4913 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4914 // preserve semantics once we get rid of the AND.
4915 SDValue NewLoad(Load, 0);
4917 // Fold the AND away. NewLoad may get replaced immediately.
4918 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4920 if (Load->getExtensionType() == ISD::EXTLOAD) {
4921 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4922 Load->getValueType(0), SDLoc(Load),
4923 Load->getChain(), Load->getBasePtr(),
4924 Load->getOffset(), Load->getMemoryVT(),
4925 Load->getMemOperand());
4926 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4927 if (Load->getNumValues() == 3) {
4928 // PRE/POST_INC loads have 3 values.
4929 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4930 NewLoad.getValue(2) };
4931 CombineTo(Load, To, 3, true);
4932 } else {
4933 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4937 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4941 // fold (and (load x), 255) -> (zextload x, i8)
4942 // fold (and (extload x, i16), 255) -> (zextload x, i8)
4943 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4944 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4945 (N0.getOpcode() == ISD::ANY_EXTEND &&
4946 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4947 if (SDValue Res = ReduceLoadWidth(N)) {
4948 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4949 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4950 AddToWorklist(N);
4951 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4952 return SDValue(N, 0);
4956 if (Level >= AfterLegalizeTypes) {
4957 // Attempt to propagate the AND back up to the leaves which, if they're
4958 // loads, can be combined to narrow loads and the AND node can be removed.
4959 // Perform after legalization so that extend nodes will already be
4960 // combined into the loads.
4961 if (BackwardsPropagateMask(N, DAG)) {
4962 return SDValue(N, 0);
4966 if (SDValue Combined = visitANDLike(N0, N1, N))
4967 return Combined;
4969 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4970 if (N0.getOpcode() == N1.getOpcode())
4971 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4972 return V;
4974 // Masking the negated extension of a boolean is just the zero-extended
4975 // boolean:
4976 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4977 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4979 // Note: the SimplifyDemandedBits fold below can make an information-losing
4980 // transform, and then we have no way to find this better fold.
4981 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4982 if (isNullOrNullSplat(N0.getOperand(0))) {
4983 SDValue SubRHS = N0.getOperand(1);
4984 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4985 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4986 return SubRHS;
4987 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4988 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4989 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4993 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4994 // fold (and (sra)) -> (and (srl)) when possible.
4995 if (SimplifyDemandedBits(SDValue(N, 0)))
4996 return SDValue(N, 0);
4998 // fold (zext_inreg (extload x)) -> (zextload x)
4999 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
5000 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5001 EVT MemVT = LN0->getMemoryVT();
5002 // If we zero all the possible extended bits, then we can turn this into
5003 // a zextload if we are running before legalize or the operation is legal.
5004 unsigned BitWidth = N1.getScalarValueSizeInBits();
5005 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
5006 BitWidth - MemVT.getScalarSizeInBits())) &&
5007 ((!LegalOperations && !LN0->isVolatile()) ||
5008 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5009 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
5010 LN0->getChain(), LN0->getBasePtr(),
5011 MemVT, LN0->getMemOperand());
5012 AddToWorklist(N);
5013 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5014 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5017 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5018 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
5019 N0.hasOneUse()) {
5020 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5021 EVT MemVT = LN0->getMemoryVT();
5022 // If we zero all the possible extended bits, then we can turn this into
5023 // a zextload if we are running before legalize or the operation is legal.
5024 unsigned BitWidth = N1.getScalarValueSizeInBits();
5025 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
5026 BitWidth - MemVT.getScalarSizeInBits())) &&
5027 ((!LegalOperations && !LN0->isVolatile()) ||
5028 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5029 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
5030 LN0->getChain(), LN0->getBasePtr(),
5031 MemVT, LN0->getMemOperand());
5032 AddToWorklist(N);
5033 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5034 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5037 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5038 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5039 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5040 N0.getOperand(1), false))
5041 return BSwap;
5044 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5045 return Shifts;
5047 return SDValue();
5050 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5051 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5052 bool DemandHighBits) {
5053 if (!LegalOperations)
5054 return SDValue();
5056 EVT VT = N->getValueType(0);
5057 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5058 return SDValue();
5059 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5060 return SDValue();
5062 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5063 bool LookPassAnd0 = false;
5064 bool LookPassAnd1 = false;
5065 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5066 std::swap(N0, N1);
5067 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5068 std::swap(N0, N1);
5069 if (N0.getOpcode() == ISD::AND) {
5070 if (!N0.getNode()->hasOneUse())
5071 return SDValue();
5072 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5073 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5074 // This is needed for X86.
5075 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5076 N01C->getZExtValue() != 0xFFFF))
5077 return SDValue();
5078 N0 = N0.getOperand(0);
5079 LookPassAnd0 = true;
5082 if (N1.getOpcode() == ISD::AND) {
5083 if (!N1.getNode()->hasOneUse())
5084 return SDValue();
5085 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5086 if (!N11C || N11C->getZExtValue() != 0xFF)
5087 return SDValue();
5088 N1 = N1.getOperand(0);
5089 LookPassAnd1 = true;
5092 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5093 std::swap(N0, N1);
5094 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5095 return SDValue();
5096 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5097 return SDValue();
5099 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5100 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5101 if (!N01C || !N11C)
5102 return SDValue();
5103 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5104 return SDValue();
5106 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5107 SDValue N00 = N0->getOperand(0);
5108 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5109 if (!N00.getNode()->hasOneUse())
5110 return SDValue();
5111 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5112 if (!N001C || N001C->getZExtValue() != 0xFF)
5113 return SDValue();
5114 N00 = N00.getOperand(0);
5115 LookPassAnd0 = true;
5118 SDValue N10 = N1->getOperand(0);
5119 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5120 if (!N10.getNode()->hasOneUse())
5121 return SDValue();
5122 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5123 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5124 // for X86.
5125 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5126 N101C->getZExtValue() != 0xFFFF))
5127 return SDValue();
5128 N10 = N10.getOperand(0);
5129 LookPassAnd1 = true;
5132 if (N00 != N10)
5133 return SDValue();
5135 // Make sure everything beyond the low halfword gets set to zero since the SRL
5136 // 16 will clear the top bits.
5137 unsigned OpSizeInBits = VT.getSizeInBits();
5138 if (DemandHighBits && OpSizeInBits > 16) {
5139 // If the left-shift isn't masked out then the only way this is a bswap is
5140 // if all bits beyond the low 8 are 0. In that case the entire pattern
5141 // reduces to a left shift anyway: leave it for other parts of the combiner.
5142 if (!LookPassAnd0)
5143 return SDValue();
5145 // However, if the right shift isn't masked out then it might be because
5146 // it's not needed. See if we can spot that too.
5147 if (!LookPassAnd1 &&
5148 !DAG.MaskedValueIsZero(
5149 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5150 return SDValue();
5153 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5154 if (OpSizeInBits > 16) {
5155 SDLoc DL(N);
5156 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5157 DAG.getConstant(OpSizeInBits - 16, DL,
5158 getShiftAmountTy(VT)));
5160 return Res;
5163 /// Return true if the specified node is an element that makes up a 32-bit
5164 /// packed halfword byteswap.
5165 /// ((x & 0x000000ff) << 8) |
5166 /// ((x & 0x0000ff00) >> 8) |
5167 /// ((x & 0x00ff0000) << 8) |
5168 /// ((x & 0xff000000) >> 8)
5169 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5170 if (!N.getNode()->hasOneUse())
5171 return false;
5173 unsigned Opc = N.getOpcode();
5174 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5175 return false;
5177 SDValue N0 = N.getOperand(0);
5178 unsigned Opc0 = N0.getOpcode();
5179 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5180 return false;
5182 ConstantSDNode *N1C = nullptr;
5183 // SHL or SRL: look upstream for AND mask operand
5184 if (Opc == ISD::AND)
5185 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5186 else if (Opc0 == ISD::AND)
5187 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5188 if (!N1C)
5189 return false;
5191 unsigned MaskByteOffset;
5192 switch (N1C->getZExtValue()) {
5193 default:
5194 return false;
5195 case 0xFF: MaskByteOffset = 0; break;
5196 case 0xFF00: MaskByteOffset = 1; break;
5197 case 0xFFFF:
5198 // In case demanded bits didn't clear the bits that will be shifted out.
5199 // This is needed for X86.
5200 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5201 MaskByteOffset = 1;
5202 break;
5204 return false;
5205 case 0xFF0000: MaskByteOffset = 2; break;
5206 case 0xFF000000: MaskByteOffset = 3; break;
5209 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5210 if (Opc == ISD::AND) {
5211 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5212 // (x >> 8) & 0xff
5213 // (x >> 8) & 0xff0000
5214 if (Opc0 != ISD::SRL)
5215 return false;
5216 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5217 if (!C || C->getZExtValue() != 8)
5218 return false;
5219 } else {
5220 // (x << 8) & 0xff00
5221 // (x << 8) & 0xff000000
5222 if (Opc0 != ISD::SHL)
5223 return false;
5224 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5225 if (!C || C->getZExtValue() != 8)
5226 return false;
5228 } else if (Opc == ISD::SHL) {
5229 // (x & 0xff) << 8
5230 // (x & 0xff0000) << 8
5231 if (MaskByteOffset != 0 && MaskByteOffset != 2)
5232 return false;
5233 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5234 if (!C || C->getZExtValue() != 8)
5235 return false;
5236 } else { // Opc == ISD::SRL
5237 // (x & 0xff00) >> 8
5238 // (x & 0xff000000) >> 8
5239 if (MaskByteOffset != 1 && MaskByteOffset != 3)
5240 return false;
5241 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5242 if (!C || C->getZExtValue() != 8)
5243 return false;
5246 if (Parts[MaskByteOffset])
5247 return false;
5249 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5250 return true;
5253 /// Match a 32-bit packed halfword bswap. That is
5254 /// ((x & 0x000000ff) << 8) |
5255 /// ((x & 0x0000ff00) >> 8) |
5256 /// ((x & 0x00ff0000) << 8) |
5257 /// ((x & 0xff000000) >> 8)
5258 /// => (rotl (bswap x), 16)
5259 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5260 if (!LegalOperations)
5261 return SDValue();
5263 EVT VT = N->getValueType(0);
5264 if (VT != MVT::i32)
5265 return SDValue();
5266 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5267 return SDValue();
5269 // Look for either
5270 // (or (or (and), (and)), (or (and), (and)))
5271 // (or (or (or (and), (and)), (and)), (and))
5272 if (N0.getOpcode() != ISD::OR)
5273 return SDValue();
5274 SDValue N00 = N0.getOperand(0);
5275 SDValue N01 = N0.getOperand(1);
5276 SDNode *Parts[4] = {};
5278 if (N1.getOpcode() == ISD::OR &&
5279 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5280 // (or (or (and), (and)), (or (and), (and)))
5281 if (!isBSwapHWordElement(N00, Parts))
5282 return SDValue();
5284 if (!isBSwapHWordElement(N01, Parts))
5285 return SDValue();
5286 SDValue N10 = N1.getOperand(0);
5287 if (!isBSwapHWordElement(N10, Parts))
5288 return SDValue();
5289 SDValue N11 = N1.getOperand(1);
5290 if (!isBSwapHWordElement(N11, Parts))
5291 return SDValue();
5292 } else {
5293 // (or (or (or (and), (and)), (and)), (and))
5294 if (!isBSwapHWordElement(N1, Parts))
5295 return SDValue();
5296 if (!isBSwapHWordElement(N01, Parts))
5297 return SDValue();
5298 if (N00.getOpcode() != ISD::OR)
5299 return SDValue();
5300 SDValue N000 = N00.getOperand(0);
5301 if (!isBSwapHWordElement(N000, Parts))
5302 return SDValue();
5303 SDValue N001 = N00.getOperand(1);
5304 if (!isBSwapHWordElement(N001, Parts))
5305 return SDValue();
5308 // Make sure the parts are all coming from the same node.
5309 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5310 return SDValue();
5312 SDLoc DL(N);
5313 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5314 SDValue(Parts[0], 0));
5316 // Result of the bswap should be rotated by 16. If it's not legal, then
5317 // do (x << 16) | (x >> 16).
5318 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5319 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5320 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5321 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5322 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5323 return DAG.getNode(ISD::OR, DL, VT,
5324 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5325 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5328 /// This contains all DAGCombine rules which reduce two values combined by
5329 /// an Or operation to a single value \see visitANDLike().
5330 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5331 EVT VT = N1.getValueType();
5332 SDLoc DL(N);
5334 // fold (or x, undef) -> -1
5335 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5336 return DAG.getAllOnesConstant(DL, VT);
5338 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5339 return V;
5341 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5342 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5343 // Don't increase # computations.
5344 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5345 // We can only do this xform if we know that bits from X that are set in C2
5346 // but not in C1 are already zero. Likewise for Y.
5347 if (const ConstantSDNode *N0O1C =
5348 getAsNonOpaqueConstant(N0.getOperand(1))) {
5349 if (const ConstantSDNode *N1O1C =
5350 getAsNonOpaqueConstant(N1.getOperand(1))) {
5351 // We can only do this xform if we know that bits from X that are set in
5352 // C2 but not in C1 are already zero. Likewise for Y.
5353 const APInt &LHSMask = N0O1C->getAPIntValue();
5354 const APInt &RHSMask = N1O1C->getAPIntValue();
5356 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5357 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5358 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5359 N0.getOperand(0), N1.getOperand(0));
5360 return DAG.getNode(ISD::AND, DL, VT, X,
5361 DAG.getConstant(LHSMask | RHSMask, DL, VT));
5367 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5368 if (N0.getOpcode() == ISD::AND &&
5369 N1.getOpcode() == ISD::AND &&
5370 N0.getOperand(0) == N1.getOperand(0) &&
5371 // Don't increase # computations.
5372 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5373 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5374 N0.getOperand(1), N1.getOperand(1));
5375 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5378 return SDValue();
5381 /// OR combines for which the commuted variant will be tried as well.
5382 static SDValue visitORCommutative(
5383 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5384 EVT VT = N0.getValueType();
5385 if (N0.getOpcode() == ISD::AND) {
5386 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5387 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5388 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5390 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5391 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5392 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5395 return SDValue();
5398 SDValue DAGCombiner::visitOR(SDNode *N) {
5399 SDValue N0 = N->getOperand(0);
5400 SDValue N1 = N->getOperand(1);
5401 EVT VT = N1.getValueType();
5403 // x | x --> x
5404 if (N0 == N1)
5405 return N0;
5407 // fold vector ops
5408 if (VT.isVector()) {
5409 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5410 return FoldedVOp;
5412 // fold (or x, 0) -> x, vector edition
5413 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5414 return N1;
5415 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5416 return N0;
5418 // fold (or x, -1) -> -1, vector edition
5419 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5420 // do not return N0, because undef node may exist in N0
5421 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5422 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5423 // do not return N1, because undef node may exist in N1
5424 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5426 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5427 // Do this only if the resulting shuffle is legal.
5428 if (isa<ShuffleVectorSDNode>(N0) &&
5429 isa<ShuffleVectorSDNode>(N1) &&
5430 // Avoid folding a node with illegal type.
5431 TLI.isTypeLegal(VT)) {
5432 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5433 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5434 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5435 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5436 // Ensure both shuffles have a zero input.
5437 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5438 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5439 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5440 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5441 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5442 bool CanFold = true;
5443 int NumElts = VT.getVectorNumElements();
5444 SmallVector<int, 4> Mask(NumElts);
5446 for (int i = 0; i != NumElts; ++i) {
5447 int M0 = SV0->getMaskElt(i);
5448 int M1 = SV1->getMaskElt(i);
5450 // Determine if either index is pointing to a zero vector.
5451 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5452 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5454 // If one element is zero and the otherside is undef, keep undef.
5455 // This also handles the case that both are undef.
5456 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5457 Mask[i] = -1;
5458 continue;
5461 // Make sure only one of the elements is zero.
5462 if (M0Zero == M1Zero) {
5463 CanFold = false;
5464 break;
5467 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5469 // We have a zero and non-zero element. If the non-zero came from
5470 // SV0 make the index a LHS index. If it came from SV1, make it
5471 // a RHS index. We need to mod by NumElts because we don't care
5472 // which operand it came from in the original shuffles.
5473 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5476 if (CanFold) {
5477 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5478 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5480 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5481 if (!LegalMask) {
5482 std::swap(NewLHS, NewRHS);
5483 ShuffleVectorSDNode::commuteMask(Mask);
5484 LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5487 if (LegalMask)
5488 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5494 // fold (or c1, c2) -> c1|c2
5495 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5496 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5497 if (N0C && N1C && !N1C->isOpaque())
5498 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5499 // canonicalize constant to RHS
5500 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5501 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5502 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5503 // fold (or x, 0) -> x
5504 if (isNullConstant(N1))
5505 return N0;
5506 // fold (or x, -1) -> -1
5507 if (isAllOnesConstant(N1))
5508 return N1;
5510 if (SDValue NewSel = foldBinOpIntoSelect(N))
5511 return NewSel;
5513 // fold (or x, c) -> c iff (x & ~c) == 0
5514 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5515 return N1;
5517 if (SDValue Combined = visitORLike(N0, N1, N))
5518 return Combined;
5520 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5521 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5522 return BSwap;
5523 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5524 return BSwap;
5526 // reassociate or
5527 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5528 return ROR;
5530 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5531 // iff (c1 & c2) != 0 or c1/c2 are undef.
5532 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5533 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5535 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5536 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5537 if (SDValue COR = DAG.FoldConstantArithmetic(
5538 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5539 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5540 AddToWorklist(IOR.getNode());
5541 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5545 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5546 return Combined;
5547 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5548 return Combined;
5550 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
5551 if (N0.getOpcode() == N1.getOpcode())
5552 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5553 return V;
5555 // See if this is some rotate idiom.
5556 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5557 return SDValue(Rot, 0);
5559 if (SDValue Load = MatchLoadCombine(N))
5560 return Load;
5562 // Simplify the operands using demanded-bits information.
5563 if (SimplifyDemandedBits(SDValue(N, 0)))
5564 return SDValue(N, 0);
5566 // If OR can be rewritten into ADD, try combines based on ADD.
5567 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5568 DAG.haveNoCommonBitsSet(N0, N1))
5569 if (SDValue Combined = visitADDLike(N))
5570 return Combined;
5572 return SDValue();
5575 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5576 if (Op.getOpcode() == ISD::AND &&
5577 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5578 Mask = Op.getOperand(1);
5579 return Op.getOperand(0);
5581 return Op;
5584 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5585 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5586 SDValue &Mask) {
5587 Op = stripConstantMask(DAG, Op, Mask);
5588 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5589 Shift = Op;
5590 return true;
5592 return false;
5595 /// Helper function for visitOR to extract the needed side of a rotate idiom
5596 /// from a shl/srl/mul/udiv. This is meant to handle cases where
5597 /// InstCombine merged some outside op with one of the shifts from
5598 /// the rotate pattern.
5599 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5600 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5601 /// patterns:
5603 /// (or (mul v c0) (shrl (mul v c1) c2)):
5604 /// expands (mul v c0) -> (shl (mul v c1) c3)
5606 /// (or (udiv v c0) (shl (udiv v c1) c2)):
5607 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
5609 /// (or (shl v c0) (shrl (shl v c1) c2)):
5610 /// expands (shl v c0) -> (shl (shl v c1) c3)
5612 /// (or (shrl v c0) (shl (shrl v c1) c2)):
5613 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
5615 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5616 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5617 SDValue ExtractFrom, SDValue &Mask,
5618 const SDLoc &DL) {
5619 assert(OppShift && ExtractFrom && "Empty SDValue");
5620 assert(
5621 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5622 "Existing shift must be valid as a rotate half");
5624 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5625 // Preconditions:
5626 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5628 // Find opcode of the needed shift to be extracted from (op0 v c0).
5629 unsigned Opcode = ISD::DELETED_NODE;
5630 bool IsMulOrDiv = false;
5631 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5632 // opcode or its arithmetic (mul or udiv) variant.
5633 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5634 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5635 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5636 return false;
5637 Opcode = NeededShift;
5638 return true;
5640 // op0 must be either the needed shift opcode or the mul/udiv equivalent
5641 // that the needed shift can be extracted from.
5642 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5643 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5644 return SDValue();
5646 // op0 must be the same opcode on both sides, have the same LHS argument,
5647 // and produce the same value type.
5648 SDValue OppShiftLHS = OppShift.getOperand(0);
5649 EVT ShiftedVT = OppShiftLHS.getValueType();
5650 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5651 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5652 ShiftedVT != ExtractFrom.getValueType())
5653 return SDValue();
5655 // Amount of the existing shift.
5656 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5657 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5658 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5659 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5660 ConstantSDNode *ExtractFromCst =
5661 isConstOrConstSplat(ExtractFrom.getOperand(1));
5662 // TODO: We should be able to handle non-uniform constant vectors for these values
5663 // Check that we have constant values.
5664 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5665 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5666 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5667 return SDValue();
5669 // Compute the shift amount we need to extract to complete the rotate.
5670 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5671 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5672 return SDValue();
5673 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5674 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5675 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5676 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5677 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5679 // Now try extract the needed shift from the ExtractFrom op and see if the
5680 // result matches up with the existing shift's LHS op.
5681 if (IsMulOrDiv) {
5682 // Op to extract from is a mul or udiv by a constant.
5683 // Check:
5684 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5685 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5686 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5687 NeededShiftAmt.getZExtValue());
5688 APInt ResultAmt;
5689 APInt Rem;
5690 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5691 if (Rem != 0 || ResultAmt != OppLHSAmt)
5692 return SDValue();
5693 } else {
5694 // Op to extract from is a shift by a constant.
5695 // Check:
5696 // c2 - (bitwidth(op0 v c0) - c1) == c0
5697 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5698 ExtractFromAmt.getBitWidth()))
5699 return SDValue();
5702 // Return the expanded shift op that should allow a rotate to be formed.
5703 EVT ShiftVT = OppShift.getOperand(1).getValueType();
5704 EVT ResVT = ExtractFrom.getValueType();
5705 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5706 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5709 // Return true if we can prove that, whenever Neg and Pos are both in the
5710 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
5711 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5713 // (or (shift1 X, Neg), (shift2 X, Pos))
5715 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5716 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
5717 // to consider shift amounts with defined behavior.
5718 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5719 SelectionDAG &DAG) {
5720 // If EltSize is a power of 2 then:
5722 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5723 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5725 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5726 // for the stronger condition:
5728 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
5730 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5731 // we can just replace Neg with Neg' for the rest of the function.
5733 // In other cases we check for the even stronger condition:
5735 // Neg == EltSize - Pos [B]
5737 // for all Neg and Pos. Note that the (or ...) then invokes undefined
5738 // behavior if Pos == 0 (and consequently Neg == EltSize).
5740 // We could actually use [A] whenever EltSize is a power of 2, but the
5741 // only extra cases that it would match are those uninteresting ones
5742 // where Neg and Pos are never in range at the same time. E.g. for
5743 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5744 // as well as (sub 32, Pos), but:
5746 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5748 // always invokes undefined behavior for 32-bit X.
5750 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5751 unsigned MaskLoBits = 0;
5752 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5753 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5754 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5755 unsigned Bits = Log2_64(EltSize);
5756 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5757 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5758 Neg = Neg.getOperand(0);
5759 MaskLoBits = Bits;
5764 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5765 if (Neg.getOpcode() != ISD::SUB)
5766 return false;
5767 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5768 if (!NegC)
5769 return false;
5770 SDValue NegOp1 = Neg.getOperand(1);
5772 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5773 // Pos'. The truncation is redundant for the purpose of the equality.
5774 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5775 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5776 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5777 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5778 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5779 MaskLoBits))
5780 Pos = Pos.getOperand(0);
5784 // The condition we need is now:
5786 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5788 // If NegOp1 == Pos then we need:
5790 // EltSize & Mask == NegC & Mask
5792 // (because "x & Mask" is a truncation and distributes through subtraction).
5793 APInt Width;
5794 if (Pos == NegOp1)
5795 Width = NegC->getAPIntValue();
5797 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5798 // Then the condition we want to prove becomes:
5800 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5802 // which, again because "x & Mask" is a truncation, becomes:
5804 // NegC & Mask == (EltSize - PosC) & Mask
5805 // EltSize & Mask == (NegC + PosC) & Mask
5806 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5807 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5808 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5809 else
5810 return false;
5811 } else
5812 return false;
5814 // Now we just need to check that EltSize & Mask == Width & Mask.
5815 if (MaskLoBits)
5816 // EltSize & Mask is 0 since Mask is EltSize - 1.
5817 return Width.getLoBits(MaskLoBits) == 0;
5818 return Width == EltSize;
5821 // A subroutine of MatchRotate used once we have found an OR of two opposite
5822 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
5823 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5824 // former being preferred if supported. InnerPos and InnerNeg are Pos and
5825 // Neg with outer conversions stripped away.
5826 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5827 SDValue Neg, SDValue InnerPos,
5828 SDValue InnerNeg, unsigned PosOpcode,
5829 unsigned NegOpcode, const SDLoc &DL) {
5830 // fold (or (shl x, (*ext y)),
5831 // (srl x, (*ext (sub 32, y)))) ->
5832 // (rotl x, y) or (rotr x, (sub 32, y))
5834 // fold (or (shl x, (*ext (sub 32, y))),
5835 // (srl x, (*ext y))) ->
5836 // (rotr x, y) or (rotl x, (sub 32, y))
5837 EVT VT = Shifted.getValueType();
5838 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5839 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5840 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5841 HasPos ? Pos : Neg).getNode();
5844 return nullptr;
5847 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
5848 // idioms for rotate, and if the target supports rotation instructions, generate
5849 // a rot[lr].
5850 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5851 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
5852 EVT VT = LHS.getValueType();
5853 if (!TLI.isTypeLegal(VT)) return nullptr;
5855 // The target must have at least one rotate flavor.
5856 bool HasROTL = hasOperation(ISD::ROTL, VT);
5857 bool HasROTR = hasOperation(ISD::ROTR, VT);
5858 if (!HasROTL && !HasROTR) return nullptr;
5860 // Check for truncated rotate.
5861 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5862 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5863 assert(LHS.getValueType() == RHS.getValueType());
5864 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5865 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5866 SDValue(Rot, 0)).getNode();
5870 // Match "(X shl/srl V1) & V2" where V2 may not be present.
5871 SDValue LHSShift; // The shift.
5872 SDValue LHSMask; // AND value if any.
5873 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5875 SDValue RHSShift; // The shift.
5876 SDValue RHSMask; // AND value if any.
5877 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5879 // If neither side matched a rotate half, bail
5880 if (!LHSShift && !RHSShift)
5881 return nullptr;
5883 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5884 // side of the rotate, so try to handle that here. In all cases we need to
5885 // pass the matched shift from the opposite side to compute the opcode and
5886 // needed shift amount to extract. We still want to do this if both sides
5887 // matched a rotate half because one half may be a potential overshift that
5888 // can be broken down (ie if InstCombine merged two shl or srl ops into a
5889 // single one).
5891 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5892 if (LHSShift)
5893 if (SDValue NewRHSShift =
5894 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5895 RHSShift = NewRHSShift;
5896 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5897 if (RHSShift)
5898 if (SDValue NewLHSShift =
5899 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5900 LHSShift = NewLHSShift;
5902 // If a side is still missing, nothing else we can do.
5903 if (!RHSShift || !LHSShift)
5904 return nullptr;
5906 // At this point we've matched or extracted a shift op on each side.
5908 if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5909 return nullptr; // Not shifting the same value.
5911 if (LHSShift.getOpcode() == RHSShift.getOpcode())
5912 return nullptr; // Shifts must disagree.
5914 // Canonicalize shl to left side in a shl/srl pair.
5915 if (RHSShift.getOpcode() == ISD::SHL) {
5916 std::swap(LHS, RHS);
5917 std::swap(LHSShift, RHSShift);
5918 std::swap(LHSMask, RHSMask);
5921 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5922 SDValue LHSShiftArg = LHSShift.getOperand(0);
5923 SDValue LHSShiftAmt = LHSShift.getOperand(1);
5924 SDValue RHSShiftArg = RHSShift.getOperand(0);
5925 SDValue RHSShiftAmt = RHSShift.getOperand(1);
5927 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5928 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5929 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5930 ConstantSDNode *RHS) {
5931 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5933 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5934 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5935 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5937 // If there is an AND of either shifted operand, apply it to the result.
5938 if (LHSMask.getNode() || RHSMask.getNode()) {
5939 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5940 SDValue Mask = AllOnes;
5942 if (LHSMask.getNode()) {
5943 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5944 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5945 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5947 if (RHSMask.getNode()) {
5948 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5949 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5950 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5953 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5956 return Rot.getNode();
5959 // If there is a mask here, and we have a variable shift, we can't be sure
5960 // that we're masking out the right stuff.
5961 if (LHSMask.getNode() || RHSMask.getNode())
5962 return nullptr;
5964 // If the shift amount is sign/zext/any-extended just peel it off.
5965 SDValue LExtOp0 = LHSShiftAmt;
5966 SDValue RExtOp0 = RHSShiftAmt;
5967 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5968 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5969 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5970 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5971 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5972 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5973 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5974 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5975 LExtOp0 = LHSShiftAmt.getOperand(0);
5976 RExtOp0 = RHSShiftAmt.getOperand(0);
5979 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5980 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5981 if (TryL)
5982 return TryL;
5984 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5985 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5986 if (TryR)
5987 return TryR;
5989 return nullptr;
5992 namespace {
5994 /// Represents known origin of an individual byte in load combine pattern. The
5995 /// value of the byte is either constant zero or comes from memory.
5996 struct ByteProvider {
5997 // For constant zero providers Load is set to nullptr. For memory providers
5998 // Load represents the node which loads the byte from memory.
5999 // ByteOffset is the offset of the byte in the value produced by the load.
6000 LoadSDNode *Load = nullptr;
6001 unsigned ByteOffset = 0;
6003 ByteProvider() = default;
6005 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6006 return ByteProvider(Load, ByteOffset);
6009 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6011 bool isConstantZero() const { return !Load; }
6012 bool isMemory() const { return Load; }
6014 bool operator==(const ByteProvider &Other) const {
6015 return Other.Load == Load && Other.ByteOffset == ByteOffset;
6018 private:
6019 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6020 : Load(Load), ByteOffset(ByteOffset) {}
6023 } // end anonymous namespace
6025 /// Recursively traverses the expression calculating the origin of the requested
6026 /// byte of the given value. Returns None if the provider can't be calculated.
6028 /// For all the values except the root of the expression verifies that the value
6029 /// has exactly one use and if it's not true return None. This way if the origin
6030 /// of the byte is returned it's guaranteed that the values which contribute to
6031 /// the byte are not used outside of this expression.
6033 /// Because the parts of the expression are not allowed to have more than one
6034 /// use this function iterates over trees, not DAGs. So it never visits the same
6035 /// node more than once.
6036 static const Optional<ByteProvider>
6037 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6038 bool Root = false) {
6039 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6040 if (Depth == 10)
6041 return None;
6043 if (!Root && !Op.hasOneUse())
6044 return None;
6046 assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6047 unsigned BitWidth = Op.getValueSizeInBits();
6048 if (BitWidth % 8 != 0)
6049 return None;
6050 unsigned ByteWidth = BitWidth / 8;
6051 assert(Index < ByteWidth && "invalid index requested");
6052 (void) ByteWidth;
6054 switch (Op.getOpcode()) {
6055 case ISD::OR: {
6056 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6057 if (!LHS)
6058 return None;
6059 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6060 if (!RHS)
6061 return None;
6063 if (LHS->isConstantZero())
6064 return RHS;
6065 if (RHS->isConstantZero())
6066 return LHS;
6067 return None;
6069 case ISD::SHL: {
6070 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6071 if (!ShiftOp)
6072 return None;
6074 uint64_t BitShift = ShiftOp->getZExtValue();
6075 if (BitShift % 8 != 0)
6076 return None;
6077 uint64_t ByteShift = BitShift / 8;
6079 return Index < ByteShift
6080 ? ByteProvider::getConstantZero()
6081 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6082 Depth + 1);
6084 case ISD::ANY_EXTEND:
6085 case ISD::SIGN_EXTEND:
6086 case ISD::ZERO_EXTEND: {
6087 SDValue NarrowOp = Op->getOperand(0);
6088 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6089 if (NarrowBitWidth % 8 != 0)
6090 return None;
6091 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6093 if (Index >= NarrowByteWidth)
6094 return Op.getOpcode() == ISD::ZERO_EXTEND
6095 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6096 : None;
6097 return calculateByteProvider(NarrowOp, Index, Depth + 1);
6099 case ISD::BSWAP:
6100 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6101 Depth + 1);
6102 case ISD::LOAD: {
6103 auto L = cast<LoadSDNode>(Op.getNode());
6104 if (L->isVolatile() || L->isIndexed())
6105 return None;
6107 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6108 if (NarrowBitWidth % 8 != 0)
6109 return None;
6110 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6112 if (Index >= NarrowByteWidth)
6113 return L->getExtensionType() == ISD::ZEXTLOAD
6114 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6115 : None;
6116 return ByteProvider::getMemory(L, Index);
6120 return None;
6123 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6124 return i;
6127 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6128 return BW - i - 1;
6131 // Check if the bytes offsets we are looking at match with either big or
6132 // little endian value loaded. Return true for big endian, false for little
6133 // endian, and None if match failed.
6134 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6135 int64_t FirstOffset) {
6136 // The endian can be decided only when it is 2 bytes at least.
6137 unsigned Width = ByteOffsets.size();
6138 if (Width < 2)
6139 return None;
6141 bool BigEndian = true, LittleEndian = true;
6142 for (unsigned i = 0; i < Width; i++) {
6143 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6144 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6145 BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6146 if (!BigEndian && !LittleEndian)
6147 return None;
6150 assert((BigEndian != LittleEndian) && "It should be either big endian or"
6151 "little endian");
6152 return BigEndian;
6155 /// Match a pattern where a wide type scalar value is loaded by several narrow
6156 /// loads and combined by shifts and ors. Fold it into a single load or a load
6157 /// and a BSWAP if the targets supports it.
6159 /// Assuming little endian target:
6160 /// i8 *a = ...
6161 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6162 /// =>
6163 /// i32 val = *((i32)a)
6165 /// i8 *a = ...
6166 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6167 /// =>
6168 /// i32 val = BSWAP(*((i32)a))
6170 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6171 /// interact well with the worklist mechanism. When a part of the pattern is
6172 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6173 /// but the root node of the pattern which triggers the load combine is not
6174 /// necessarily a direct user of the changed node. For example, once the address
6175 /// of t28 load is reassociated load combine won't be triggered:
6176 /// t25: i32 = add t4, Constant:i32<2>
6177 /// t26: i64 = sign_extend t25
6178 /// t27: i64 = add t2, t26
6179 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6180 /// t29: i32 = zero_extend t28
6181 /// t32: i32 = shl t29, Constant:i8<8>
6182 /// t33: i32 = or t23, t32
6183 /// As a possible fix visitLoad can check if the load can be a part of a load
6184 /// combine pattern and add corresponding OR roots to the worklist.
6185 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6186 assert(N->getOpcode() == ISD::OR &&
6187 "Can only match load combining against OR nodes");
6189 // Handles simple types only
6190 EVT VT = N->getValueType(0);
6191 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6192 return SDValue();
6193 unsigned ByteWidth = VT.getSizeInBits() / 8;
6195 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6196 // Before legalize we can introduce too wide illegal loads which will be later
6197 // split into legal sized loads. This enables us to combine i64 load by i8
6198 // patterns to a couple of i32 loads on 32 bit targets.
6199 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6200 return SDValue();
6202 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6203 auto MemoryByteOffset = [&] (ByteProvider P) {
6204 assert(P.isMemory() && "Must be a memory byte provider");
6205 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6206 assert(LoadBitWidth % 8 == 0 &&
6207 "can only analyze providers for individual bytes not bit");
6208 unsigned LoadByteWidth = LoadBitWidth / 8;
6209 return IsBigEndianTarget
6210 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6211 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6214 Optional<BaseIndexOffset> Base;
6215 SDValue Chain;
6217 SmallPtrSet<LoadSDNode *, 8> Loads;
6218 Optional<ByteProvider> FirstByteProvider;
6219 int64_t FirstOffset = INT64_MAX;
6221 // Check if all the bytes of the OR we are looking at are loaded from the same
6222 // base address. Collect bytes offsets from Base address in ByteOffsets.
6223 SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6224 for (unsigned i = 0; i < ByteWidth; i++) {
6225 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6226 if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6227 return SDValue();
6229 LoadSDNode *L = P->Load;
6230 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6231 "Must be enforced by calculateByteProvider");
6232 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6234 // All loads must share the same chain
6235 SDValue LChain = L->getChain();
6236 if (!Chain)
6237 Chain = LChain;
6238 else if (Chain != LChain)
6239 return SDValue();
6241 // Loads must share the same base address
6242 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6243 int64_t ByteOffsetFromBase = 0;
6244 if (!Base)
6245 Base = Ptr;
6246 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6247 return SDValue();
6249 // Calculate the offset of the current byte from the base address
6250 ByteOffsetFromBase += MemoryByteOffset(*P);
6251 ByteOffsets[i] = ByteOffsetFromBase;
6253 // Remember the first byte load
6254 if (ByteOffsetFromBase < FirstOffset) {
6255 FirstByteProvider = P;
6256 FirstOffset = ByteOffsetFromBase;
6259 Loads.insert(L);
6261 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6262 "memory, so there must be at least one load which produces the value");
6263 assert(Base && "Base address of the accessed memory location must be set");
6264 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6266 // Check if the bytes of the OR we are looking at match with either big or
6267 // little endian value load
6268 Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6269 if (!IsBigEndian.hasValue())
6270 return SDValue();
6272 assert(FirstByteProvider && "must be set");
6274 // Ensure that the first byte is loaded from zero offset of the first load.
6275 // So the combined value can be loaded from the first load address.
6276 if (MemoryByteOffset(*FirstByteProvider) != 0)
6277 return SDValue();
6278 LoadSDNode *FirstLoad = FirstByteProvider->Load;
6280 // The node we are looking at matches with the pattern, check if we can
6281 // replace it with a single load and bswap if needed.
6283 // If the load needs byte swap check if the target supports it
6284 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6286 // Before legalize we can introduce illegal bswaps which will be later
6287 // converted to an explicit bswap sequence. This way we end up with a single
6288 // load and byte shuffling instead of several loads and byte shuffling.
6289 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6290 return SDValue();
6292 // Check that a load of the wide type is both allowed and fast on the target
6293 bool Fast = false;
6294 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6295 VT, FirstLoad->getAddressSpace(),
6296 FirstLoad->getAlignment(), &Fast);
6297 if (!Allowed || !Fast)
6298 return SDValue();
6300 SDValue NewLoad =
6301 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6302 FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6304 // Transfer chain users from old loads to the new load.
6305 for (LoadSDNode *L : Loads)
6306 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6308 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6311 // If the target has andn, bsl, or a similar bit-select instruction,
6312 // we want to unfold masked merge, with canonical pattern of:
6313 // | A | |B|
6314 // ((x ^ y) & m) ^ y
6315 // | D |
6316 // Into:
6317 // (x & m) | (y & ~m)
6318 // If y is a constant, and the 'andn' does not work with immediates,
6319 // we unfold into a different pattern:
6320 // ~(~x & m) & (m | y)
6321 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6322 // the very least that breaks andnpd / andnps patterns, and because those
6323 // patterns are simplified in IR and shouldn't be created in the DAG
6324 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6325 assert(N->getOpcode() == ISD::XOR);
6327 // Don't touch 'not' (i.e. where y = -1).
6328 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6329 return SDValue();
6331 EVT VT = N->getValueType(0);
6333 // There are 3 commutable operators in the pattern,
6334 // so we have to deal with 8 possible variants of the basic pattern.
6335 SDValue X, Y, M;
6336 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6337 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6338 return false;
6339 SDValue Xor = And.getOperand(XorIdx);
6340 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6341 return false;
6342 SDValue Xor0 = Xor.getOperand(0);
6343 SDValue Xor1 = Xor.getOperand(1);
6344 // Don't touch 'not' (i.e. where y = -1).
6345 if (isAllOnesOrAllOnesSplat(Xor1))
6346 return false;
6347 if (Other == Xor0)
6348 std::swap(Xor0, Xor1);
6349 if (Other != Xor1)
6350 return false;
6351 X = Xor0;
6352 Y = Xor1;
6353 M = And.getOperand(XorIdx ? 0 : 1);
6354 return true;
6357 SDValue N0 = N->getOperand(0);
6358 SDValue N1 = N->getOperand(1);
6359 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6360 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6361 return SDValue();
6363 // Don't do anything if the mask is constant. This should not be reachable.
6364 // InstCombine should have already unfolded this pattern, and DAGCombiner
6365 // probably shouldn't produce it, too.
6366 if (isa<ConstantSDNode>(M.getNode()))
6367 return SDValue();
6369 // We can transform if the target has AndNot
6370 if (!TLI.hasAndNot(M))
6371 return SDValue();
6373 SDLoc DL(N);
6375 // If Y is a constant, check that 'andn' works with immediates.
6376 if (!TLI.hasAndNot(Y)) {
6377 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6378 // If not, we need to do a bit more work to make sure andn is still used.
6379 SDValue NotX = DAG.getNOT(DL, X, VT);
6380 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6381 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6382 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6383 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6386 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6387 SDValue NotM = DAG.getNOT(DL, M, VT);
6388 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6390 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6393 SDValue DAGCombiner::visitXOR(SDNode *N) {
6394 SDValue N0 = N->getOperand(0);
6395 SDValue N1 = N->getOperand(1);
6396 EVT VT = N0.getValueType();
6398 // fold vector ops
6399 if (VT.isVector()) {
6400 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6401 return FoldedVOp;
6403 // fold (xor x, 0) -> x, vector edition
6404 if (ISD::isBuildVectorAllZeros(N0.getNode()))
6405 return N1;
6406 if (ISD::isBuildVectorAllZeros(N1.getNode()))
6407 return N0;
6410 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6411 SDLoc DL(N);
6412 if (N0.isUndef() && N1.isUndef())
6413 return DAG.getConstant(0, DL, VT);
6414 // fold (xor x, undef) -> undef
6415 if (N0.isUndef())
6416 return N0;
6417 if (N1.isUndef())
6418 return N1;
6419 // fold (xor c1, c2) -> c1^c2
6420 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6421 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6422 if (N0C && N1C)
6423 return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6424 // canonicalize constant to RHS
6425 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6426 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6427 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6428 // fold (xor x, 0) -> x
6429 if (isNullConstant(N1))
6430 return N0;
6432 if (SDValue NewSel = foldBinOpIntoSelect(N))
6433 return NewSel;
6435 // reassociate xor
6436 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6437 return RXOR;
6439 // fold !(x cc y) -> (x !cc y)
6440 unsigned N0Opcode = N0.getOpcode();
6441 SDValue LHS, RHS, CC;
6442 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6443 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6444 LHS.getValueType().isInteger());
6445 if (!LegalOperations ||
6446 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6447 switch (N0Opcode) {
6448 default:
6449 llvm_unreachable("Unhandled SetCC Equivalent!");
6450 case ISD::SETCC:
6451 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6452 case ISD::SELECT_CC:
6453 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6454 N0.getOperand(3), NotCC);
6459 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6460 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6461 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6462 SDValue V = N0.getOperand(0);
6463 SDLoc DL0(N0);
6464 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6465 DAG.getConstant(1, DL0, V.getValueType()));
6466 AddToWorklist(V.getNode());
6467 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6470 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6471 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6472 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6473 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6474 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6475 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6476 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6477 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6478 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6479 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6482 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6483 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6484 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6485 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6486 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6487 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6488 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6489 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6490 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6491 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6494 // fold (xor (and x, y), y) -> (and (not x), y)
6495 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6496 SDValue X = N0.getOperand(0);
6497 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6498 AddToWorklist(NotX.getNode());
6499 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6502 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6503 ConstantSDNode *XorC = isConstOrConstSplat(N1);
6504 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6505 unsigned BitWidth = VT.getScalarSizeInBits();
6506 if (XorC && ShiftC) {
6507 // Don't crash on an oversized shift. We can not guarantee that a bogus
6508 // shift has been simplified to undef.
6509 uint64_t ShiftAmt = ShiftC->getLimitedValue();
6510 if (ShiftAmt < BitWidth) {
6511 APInt Ones = APInt::getAllOnesValue(BitWidth);
6512 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6513 if (XorC->getAPIntValue() == Ones) {
6514 // If the xor constant is a shifted -1, do a 'not' before the shift:
6515 // xor (X << ShiftC), XorC --> (not X) << ShiftC
6516 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6517 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6518 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6524 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6525 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6526 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6527 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6528 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6529 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6530 SDValue S0 = S.getOperand(0);
6531 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6532 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6533 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6534 if (C->getAPIntValue() == (OpSizeInBits - 1))
6535 return DAG.getNode(ISD::ABS, DL, VT, S0);
6540 // fold (xor x, x) -> 0
6541 if (N0 == N1)
6542 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6544 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6545 // Here is a concrete example of this equivalence:
6546 // i16 x == 14
6547 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
6548 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6550 // =>
6552 // i16 ~1 == 0b1111111111111110
6553 // i16 rol(~1, 14) == 0b1011111111111111
6555 // Some additional tips to help conceptualize this transform:
6556 // - Try to see the operation as placing a single zero in a value of all ones.
6557 // - There exists no value for x which would allow the result to contain zero.
6558 // - Values of x larger than the bitwidth are undefined and do not require a
6559 // consistent result.
6560 // - Pushing the zero left requires shifting one bits in from the right.
6561 // A rotate left of ~1 is a nice way of achieving the desired result.
6562 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6563 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6564 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6565 N0.getOperand(1));
6568 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
6569 if (N0Opcode == N1.getOpcode())
6570 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6571 return V;
6573 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
6574 if (SDValue MM = unfoldMaskedMerge(N))
6575 return MM;
6577 // Simplify the expression using non-local knowledge.
6578 if (SimplifyDemandedBits(SDValue(N, 0)))
6579 return SDValue(N, 0);
6581 return SDValue();
6584 /// Handle transforms common to the three shifts, when the shift amount is a
6585 /// constant.
6586 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6587 // Do not turn a 'not' into a regular xor.
6588 if (isBitwiseNot(N->getOperand(0)))
6589 return SDValue();
6591 SDNode *LHS = N->getOperand(0).getNode();
6592 if (!LHS->hasOneUse()) return SDValue();
6594 // We want to pull some binops through shifts, so that we have (and (shift))
6595 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
6596 // thing happens with address calculations, so it's important to canonicalize
6597 // it.
6598 bool HighBitSet = false; // Can we transform this if the high bit is set?
6600 switch (LHS->getOpcode()) {
6601 default: return SDValue();
6602 case ISD::OR:
6603 case ISD::XOR:
6604 HighBitSet = false; // We can only transform sra if the high bit is clear.
6605 break;
6606 case ISD::AND:
6607 HighBitSet = true; // We can only transform sra if the high bit is set.
6608 break;
6609 case ISD::ADD:
6610 if (N->getOpcode() != ISD::SHL)
6611 return SDValue(); // only shl(add) not sr[al](add).
6612 HighBitSet = false; // We can only transform sra if the high bit is clear.
6613 break;
6616 // We require the RHS of the binop to be a constant and not opaque as well.
6617 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6618 if (!BinOpCst) return SDValue();
6620 // FIXME: disable this unless the input to the binop is a shift by a constant
6621 // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6622 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6623 bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6624 BinOpLHSVal->getOpcode() == ISD::SRA ||
6625 BinOpLHSVal->getOpcode() == ISD::SRL;
6626 bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6627 BinOpLHSVal->getOpcode() == ISD::SELECT;
6629 if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6630 !isCopyOrSelect)
6631 return SDValue();
6633 if (isCopyOrSelect && N->hasOneUse())
6634 return SDValue();
6636 EVT VT = N->getValueType(0);
6638 // If this is a signed shift right, and the high bit is modified by the
6639 // logical operation, do not perform the transformation. The highBitSet
6640 // boolean indicates the value of the high bit of the constant which would
6641 // cause it to be modified for this operation.
6642 if (N->getOpcode() == ISD::SRA) {
6643 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6644 if (BinOpRHSSignSet != HighBitSet)
6645 return SDValue();
6648 if (!TLI.isDesirableToCommuteWithShift(N, Level))
6649 return SDValue();
6651 // Fold the constants, shifting the binop RHS by the shift amount.
6652 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6653 N->getValueType(0),
6654 LHS->getOperand(1), N->getOperand(1));
6655 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6657 // Create the new shift.
6658 SDValue NewShift = DAG.getNode(N->getOpcode(),
6659 SDLoc(LHS->getOperand(0)),
6660 VT, LHS->getOperand(0), N->getOperand(1));
6662 // Create the new binop.
6663 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6666 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6667 assert(N->getOpcode() == ISD::TRUNCATE);
6668 assert(N->getOperand(0).getOpcode() == ISD::AND);
6670 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6671 EVT TruncVT = N->getValueType(0);
6672 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
6673 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
6674 SDValue N01 = N->getOperand(0).getOperand(1);
6675 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6676 SDLoc DL(N);
6677 SDValue N00 = N->getOperand(0).getOperand(0);
6678 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6679 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6680 AddToWorklist(Trunc00.getNode());
6681 AddToWorklist(Trunc01.getNode());
6682 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6686 return SDValue();
6689 SDValue DAGCombiner::visitRotate(SDNode *N) {
6690 SDLoc dl(N);
6691 SDValue N0 = N->getOperand(0);
6692 SDValue N1 = N->getOperand(1);
6693 EVT VT = N->getValueType(0);
6694 unsigned Bitsize = VT.getScalarSizeInBits();
6696 // fold (rot x, 0) -> x
6697 if (isNullOrNullSplat(N1))
6698 return N0;
6700 // fold (rot x, c) -> x iff (c % BitSize) == 0
6701 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6702 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6703 if (DAG.MaskedValueIsZero(N1, ModuloMask))
6704 return N0;
6707 // fold (rot x, c) -> (rot x, c % BitSize)
6708 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6709 if (Cst->getAPIntValue().uge(Bitsize)) {
6710 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6711 return DAG.getNode(N->getOpcode(), dl, VT, N0,
6712 DAG.getConstant(RotAmt, dl, N1.getValueType()));
6716 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6717 if (N1.getOpcode() == ISD::TRUNCATE &&
6718 N1.getOperand(0).getOpcode() == ISD::AND) {
6719 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6720 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6723 unsigned NextOp = N0.getOpcode();
6724 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6725 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6726 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6727 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6728 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6729 EVT ShiftVT = C1->getValueType(0);
6730 bool SameSide = (N->getOpcode() == NextOp);
6731 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6732 if (SDValue CombinedShift =
6733 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6734 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6735 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6736 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6737 BitsizeC.getNode());
6738 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6739 CombinedShiftNorm);
6743 return SDValue();
6746 SDValue DAGCombiner::visitSHL(SDNode *N) {
6747 SDValue N0 = N->getOperand(0);
6748 SDValue N1 = N->getOperand(1);
6749 if (SDValue V = DAG.simplifyShift(N0, N1))
6750 return V;
6752 EVT VT = N0.getValueType();
6753 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6755 // fold vector ops
6756 if (VT.isVector()) {
6757 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6758 return FoldedVOp;
6760 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6761 // If setcc produces all-one true value then:
6762 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6763 if (N1CV && N1CV->isConstant()) {
6764 if (N0.getOpcode() == ISD::AND) {
6765 SDValue N00 = N0->getOperand(0);
6766 SDValue N01 = N0->getOperand(1);
6767 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6769 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6770 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6771 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6772 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6773 N01CV, N1CV))
6774 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6780 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6782 // fold (shl c1, c2) -> c1<<c2
6783 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6784 if (N0C && N1C && !N1C->isOpaque())
6785 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6787 if (SDValue NewSel = foldBinOpIntoSelect(N))
6788 return NewSel;
6790 // if (shl x, c) is known to be zero, return 0
6791 if (DAG.MaskedValueIsZero(SDValue(N, 0),
6792 APInt::getAllOnesValue(OpSizeInBits)))
6793 return DAG.getConstant(0, SDLoc(N), VT);
6794 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6795 if (N1.getOpcode() == ISD::TRUNCATE &&
6796 N1.getOperand(0).getOpcode() == ISD::AND) {
6797 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6798 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6801 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6802 return SDValue(N, 0);
6804 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6805 if (N0.getOpcode() == ISD::SHL) {
6806 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6807 ConstantSDNode *RHS) {
6808 APInt c1 = LHS->getAPIntValue();
6809 APInt c2 = RHS->getAPIntValue();
6810 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6811 return (c1 + c2).uge(OpSizeInBits);
6813 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6814 return DAG.getConstant(0, SDLoc(N), VT);
6816 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6817 ConstantSDNode *RHS) {
6818 APInt c1 = LHS->getAPIntValue();
6819 APInt c2 = RHS->getAPIntValue();
6820 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6821 return (c1 + c2).ult(OpSizeInBits);
6823 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6824 SDLoc DL(N);
6825 EVT ShiftVT = N1.getValueType();
6826 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6827 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6831 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6832 // For this to be valid, the second form must not preserve any of the bits
6833 // that are shifted out by the inner shift in the first form. This means
6834 // the outer shift size must be >= the number of bits added by the ext.
6835 // As a corollary, we don't care what kind of ext it is.
6836 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6837 N0.getOpcode() == ISD::ANY_EXTEND ||
6838 N0.getOpcode() == ISD::SIGN_EXTEND) &&
6839 N0.getOperand(0).getOpcode() == ISD::SHL) {
6840 SDValue N0Op0 = N0.getOperand(0);
6841 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6842 APInt c1 = N0Op0C1->getAPIntValue();
6843 APInt c2 = N1C->getAPIntValue();
6844 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6846 EVT InnerShiftVT = N0Op0.getValueType();
6847 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6848 if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6849 SDLoc DL(N0);
6850 APInt Sum = c1 + c2;
6851 if (Sum.uge(OpSizeInBits))
6852 return DAG.getConstant(0, DL, VT);
6854 return DAG.getNode(
6855 ISD::SHL, DL, VT,
6856 DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6857 DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6862 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6863 // Only fold this if the inner zext has no other uses to avoid increasing
6864 // the total number of instructions.
6865 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6866 N0.getOperand(0).getOpcode() == ISD::SRL) {
6867 SDValue N0Op0 = N0.getOperand(0);
6868 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6869 if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6870 uint64_t c1 = N0Op0C1->getZExtValue();
6871 uint64_t c2 = N1C->getZExtValue();
6872 if (c1 == c2) {
6873 SDValue NewOp0 = N0.getOperand(0);
6874 EVT CountVT = NewOp0.getOperand(1).getValueType();
6875 SDLoc DL(N);
6876 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6877 NewOp0,
6878 DAG.getConstant(c2, DL, CountVT));
6879 AddToWorklist(NewSHL.getNode());
6880 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6886 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
6887 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
6888 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6889 N0->getFlags().hasExact()) {
6890 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6891 uint64_t C1 = N0C1->getZExtValue();
6892 uint64_t C2 = N1C->getZExtValue();
6893 SDLoc DL(N);
6894 if (C1 <= C2)
6895 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6896 DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6897 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6898 DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6902 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6903 // (and (srl x, (sub c1, c2), MASK)
6904 // Only fold this if the inner shift has no other uses -- if it does, folding
6905 // this will increase the total number of instructions.
6906 // TODO - drop hasOneUse requirement if c1 == c2?
6907 // TODO - support non-uniform vector shift amounts.
6908 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6909 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
6910 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6911 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
6912 uint64_t c1 = N0C1->getZExtValue();
6913 uint64_t c2 = N1C->getZExtValue();
6914 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6915 SDValue Shift;
6916 if (c2 > c1) {
6917 Mask <<= c2 - c1;
6918 SDLoc DL(N);
6919 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6920 DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6921 } else {
6922 Mask.lshrInPlace(c1 - c2);
6923 SDLoc DL(N);
6924 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6925 DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6927 SDLoc DL(N0);
6928 return DAG.getNode(ISD::AND, DL, VT, Shift,
6929 DAG.getConstant(Mask, DL, VT));
6934 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6935 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6936 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6937 SDLoc DL(N);
6938 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6939 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6940 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6943 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6944 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6945 // Variant of version done on multiply, except mul by a power of 2 is turned
6946 // into a shift.
6947 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6948 N0.getNode()->hasOneUse() &&
6949 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6950 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6951 TLI.isDesirableToCommuteWithShift(N, Level)) {
6952 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6953 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6954 AddToWorklist(Shl0.getNode());
6955 AddToWorklist(Shl1.getNode());
6956 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6959 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6960 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6961 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6962 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6963 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6964 if (isConstantOrConstantVector(Shl))
6965 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6968 if (N1C && !N1C->isOpaque())
6969 if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6970 return NewSHL;
6972 return SDValue();
6975 SDValue DAGCombiner::visitSRA(SDNode *N) {
6976 SDValue N0 = N->getOperand(0);
6977 SDValue N1 = N->getOperand(1);
6978 if (SDValue V = DAG.simplifyShift(N0, N1))
6979 return V;
6981 EVT VT = N0.getValueType();
6982 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6984 // Arithmetic shifting an all-sign-bit value is a no-op.
6985 // fold (sra 0, x) -> 0
6986 // fold (sra -1, x) -> -1
6987 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6988 return N0;
6990 // fold vector ops
6991 if (VT.isVector())
6992 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6993 return FoldedVOp;
6995 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6997 // fold (sra c1, c2) -> (sra c1, c2)
6998 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6999 if (N0C && N1C && !N1C->isOpaque())
7000 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7002 if (SDValue NewSel = foldBinOpIntoSelect(N))
7003 return NewSel;
7005 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7006 // sext_inreg.
7007 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7008 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7009 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7010 if (VT.isVector())
7011 ExtVT = EVT::getVectorVT(*DAG.getContext(),
7012 ExtVT, VT.getVectorNumElements());
7013 if ((!LegalOperations ||
7014 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7015 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7016 N0.getOperand(0), DAG.getValueType(ExtVT));
7019 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7020 // clamp (add c1, c2) to max shift.
7021 if (N0.getOpcode() == ISD::SRA) {
7022 SDLoc DL(N);
7023 EVT ShiftVT = N1.getValueType();
7024 EVT ShiftSVT = ShiftVT.getScalarType();
7025 SmallVector<SDValue, 16> ShiftValues;
7027 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7028 APInt c1 = LHS->getAPIntValue();
7029 APInt c2 = RHS->getAPIntValue();
7030 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7031 APInt Sum = c1 + c2;
7032 unsigned ShiftSum =
7033 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7034 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7035 return true;
7037 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7038 SDValue ShiftValue;
7039 if (VT.isVector())
7040 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7041 else
7042 ShiftValue = ShiftValues[0];
7043 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7047 // fold (sra (shl X, m), (sub result_size, n))
7048 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7049 // result_size - n != m.
7050 // If truncate is free for the target sext(shl) is likely to result in better
7051 // code.
7052 if (N0.getOpcode() == ISD::SHL && N1C) {
7053 // Get the two constanst of the shifts, CN0 = m, CN = n.
7054 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7055 if (N01C) {
7056 LLVMContext &Ctx = *DAG.getContext();
7057 // Determine what the truncate's result bitsize and type would be.
7058 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7060 if (VT.isVector())
7061 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7063 // Determine the residual right-shift amount.
7064 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7066 // If the shift is not a no-op (in which case this should be just a sign
7067 // extend already), the truncated to type is legal, sign_extend is legal
7068 // on that type, and the truncate to that type is both legal and free,
7069 // perform the transform.
7070 if ((ShiftAmt > 0) &&
7071 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7072 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7073 TLI.isTruncateFree(VT, TruncVT)) {
7074 SDLoc DL(N);
7075 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7076 getShiftAmountTy(N0.getOperand(0).getValueType()));
7077 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7078 N0.getOperand(0), Amt);
7079 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7080 Shift);
7081 return DAG.getNode(ISD::SIGN_EXTEND, DL,
7082 N->getValueType(0), Trunc);
7087 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7088 if (N1.getOpcode() == ISD::TRUNCATE &&
7089 N1.getOperand(0).getOpcode() == ISD::AND) {
7090 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7091 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7094 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7095 // if c1 is equal to the number of bits the trunc removes
7096 if (N0.getOpcode() == ISD::TRUNCATE &&
7097 (N0.getOperand(0).getOpcode() == ISD::SRL ||
7098 N0.getOperand(0).getOpcode() == ISD::SRA) &&
7099 N0.getOperand(0).hasOneUse() &&
7100 N0.getOperand(0).getOperand(1).hasOneUse() &&
7101 N1C) {
7102 SDValue N0Op0 = N0.getOperand(0);
7103 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7104 unsigned LargeShiftVal = LargeShift->getZExtValue();
7105 EVT LargeVT = N0Op0.getValueType();
7107 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
7108 SDLoc DL(N);
7109 SDValue Amt =
7110 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
7111 getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
7112 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
7113 N0Op0.getOperand(0), Amt);
7114 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7119 // Simplify, based on bits shifted out of the LHS.
7120 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7121 return SDValue(N, 0);
7123 // If the sign bit is known to be zero, switch this to a SRL.
7124 if (DAG.SignBitIsZero(N0))
7125 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7127 if (N1C && !N1C->isOpaque())
7128 if (SDValue NewSRA = visitShiftByConstant(N, N1C))
7129 return NewSRA;
7131 return SDValue();
7134 SDValue DAGCombiner::visitSRL(SDNode *N) {
7135 SDValue N0 = N->getOperand(0);
7136 SDValue N1 = N->getOperand(1);
7137 if (SDValue V = DAG.simplifyShift(N0, N1))
7138 return V;
7140 EVT VT = N0.getValueType();
7141 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7143 // fold vector ops
7144 if (VT.isVector())
7145 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7146 return FoldedVOp;
7148 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7150 // fold (srl c1, c2) -> c1 >>u c2
7151 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7152 if (N0C && N1C && !N1C->isOpaque())
7153 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7155 if (SDValue NewSel = foldBinOpIntoSelect(N))
7156 return NewSel;
7158 // if (srl x, c) is known to be zero, return 0
7159 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7160 APInt::getAllOnesValue(OpSizeInBits)))
7161 return DAG.getConstant(0, SDLoc(N), VT);
7163 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7164 if (N0.getOpcode() == ISD::SRL) {
7165 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7166 ConstantSDNode *RHS) {
7167 APInt c1 = LHS->getAPIntValue();
7168 APInt c2 = RHS->getAPIntValue();
7169 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7170 return (c1 + c2).uge(OpSizeInBits);
7172 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7173 return DAG.getConstant(0, SDLoc(N), VT);
7175 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7176 ConstantSDNode *RHS) {
7177 APInt c1 = LHS->getAPIntValue();
7178 APInt c2 = RHS->getAPIntValue();
7179 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7180 return (c1 + c2).ult(OpSizeInBits);
7182 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7183 SDLoc DL(N);
7184 EVT ShiftVT = N1.getValueType();
7185 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7186 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7190 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7191 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7192 N0.getOperand(0).getOpcode() == ISD::SRL) {
7193 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7194 uint64_t c1 = N001C->getZExtValue();
7195 uint64_t c2 = N1C->getZExtValue();
7196 EVT InnerShiftVT = N0.getOperand(0).getValueType();
7197 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7198 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7199 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7200 if (c1 + OpSizeInBits == InnerShiftSize) {
7201 SDLoc DL(N0);
7202 if (c1 + c2 >= InnerShiftSize)
7203 return DAG.getConstant(0, DL, VT);
7204 return DAG.getNode(ISD::TRUNCATE, DL, VT,
7205 DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7206 N0.getOperand(0).getOperand(0),
7207 DAG.getConstant(c1 + c2, DL,
7208 ShiftCountVT)));
7213 // fold (srl (shl x, c), c) -> (and x, cst2)
7214 // TODO - (srl (shl x, c1), c2).
7215 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7216 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7217 SDLoc DL(N);
7218 SDValue Mask =
7219 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7220 AddToWorklist(Mask.getNode());
7221 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7224 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7225 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7226 // Shifting in all undef bits?
7227 EVT SmallVT = N0.getOperand(0).getValueType();
7228 unsigned BitSize = SmallVT.getScalarSizeInBits();
7229 if (N1C->getZExtValue() >= BitSize)
7230 return DAG.getUNDEF(VT);
7232 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7233 uint64_t ShiftAmt = N1C->getZExtValue();
7234 SDLoc DL0(N0);
7235 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7236 N0.getOperand(0),
7237 DAG.getConstant(ShiftAmt, DL0,
7238 getShiftAmountTy(SmallVT)));
7239 AddToWorklist(SmallShift.getNode());
7240 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7241 SDLoc DL(N);
7242 return DAG.getNode(ISD::AND, DL, VT,
7243 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7244 DAG.getConstant(Mask, DL, VT));
7248 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
7249 // bit, which is unmodified by sra.
7250 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
7251 if (N0.getOpcode() == ISD::SRA)
7252 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7255 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
7256 if (N1C && N0.getOpcode() == ISD::CTLZ &&
7257 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7258 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7260 // If any of the input bits are KnownOne, then the input couldn't be all
7261 // zeros, thus the result of the srl will always be zero.
7262 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7264 // If all of the bits input the to ctlz node are known to be zero, then
7265 // the result of the ctlz is "32" and the result of the shift is one.
7266 APInt UnknownBits = ~Known.Zero;
7267 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7269 // Otherwise, check to see if there is exactly one bit input to the ctlz.
7270 if (UnknownBits.isPowerOf2()) {
7271 // Okay, we know that only that the single bit specified by UnknownBits
7272 // could be set on input to the CTLZ node. If this bit is set, the SRL
7273 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7274 // to an SRL/XOR pair, which is likely to simplify more.
7275 unsigned ShAmt = UnknownBits.countTrailingZeros();
7276 SDValue Op = N0.getOperand(0);
7278 if (ShAmt) {
7279 SDLoc DL(N0);
7280 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7281 DAG.getConstant(ShAmt, DL,
7282 getShiftAmountTy(Op.getValueType())));
7283 AddToWorklist(Op.getNode());
7286 SDLoc DL(N);
7287 return DAG.getNode(ISD::XOR, DL, VT,
7288 Op, DAG.getConstant(1, DL, VT));
7292 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7293 if (N1.getOpcode() == ISD::TRUNCATE &&
7294 N1.getOperand(0).getOpcode() == ISD::AND) {
7295 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7296 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7299 // fold operands of srl based on knowledge that the low bits are not
7300 // demanded.
7301 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7302 return SDValue(N, 0);
7304 if (N1C && !N1C->isOpaque())
7305 if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7306 return NewSRL;
7308 // Attempt to convert a srl of a load into a narrower zero-extending load.
7309 if (SDValue NarrowLoad = ReduceLoadWidth(N))
7310 return NarrowLoad;
7312 // Here is a common situation. We want to optimize:
7314 // %a = ...
7315 // %b = and i32 %a, 2
7316 // %c = srl i32 %b, 1
7317 // brcond i32 %c ...
7319 // into
7321 // %a = ...
7322 // %b = and %a, 2
7323 // %c = setcc eq %b, 0
7324 // brcond %c ...
7326 // However when after the source operand of SRL is optimized into AND, the SRL
7327 // itself may not be optimized further. Look for it and add the BRCOND into
7328 // the worklist.
7329 if (N->hasOneUse()) {
7330 SDNode *Use = *N->use_begin();
7331 if (Use->getOpcode() == ISD::BRCOND)
7332 AddToWorklist(Use);
7333 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7334 // Also look pass the truncate.
7335 Use = *Use->use_begin();
7336 if (Use->getOpcode() == ISD::BRCOND)
7337 AddToWorklist(Use);
7341 return SDValue();
7344 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7345 EVT VT = N->getValueType(0);
7346 SDValue N0 = N->getOperand(0);
7347 SDValue N1 = N->getOperand(1);
7348 SDValue N2 = N->getOperand(2);
7349 bool IsFSHL = N->getOpcode() == ISD::FSHL;
7350 unsigned BitWidth = VT.getScalarSizeInBits();
7352 // fold (fshl N0, N1, 0) -> N0
7353 // fold (fshr N0, N1, 0) -> N1
7354 if (isPowerOf2_32(BitWidth))
7355 if (DAG.MaskedValueIsZero(
7356 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7357 return IsFSHL ? N0 : N1;
7359 auto IsUndefOrZero = [](SDValue V) {
7360 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7363 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7364 EVT ShAmtTy = N2.getValueType();
7366 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7367 if (Cst->getAPIntValue().uge(BitWidth)) {
7368 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7369 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7370 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7373 unsigned ShAmt = Cst->getZExtValue();
7374 if (ShAmt == 0)
7375 return IsFSHL ? N0 : N1;
7377 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7378 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7379 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7380 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7381 if (IsUndefOrZero(N0))
7382 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7383 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7384 SDLoc(N), ShAmtTy));
7385 if (IsUndefOrZero(N1))
7386 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7387 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7388 SDLoc(N), ShAmtTy));
7391 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7392 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7393 // iff We know the shift amount is in range.
7394 // TODO: when is it worth doing SUB(BW, N2) as well?
7395 if (isPowerOf2_32(BitWidth)) {
7396 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7397 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7398 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7399 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7400 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7403 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7404 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7405 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7406 // is legal as well we might be better off avoiding non-constant (BW - N2).
7407 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7408 if (N0 == N1 && hasOperation(RotOpc, VT))
7409 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7411 // Simplify, based on bits shifted out of N0/N1.
7412 if (SimplifyDemandedBits(SDValue(N, 0)))
7413 return SDValue(N, 0);
7415 return SDValue();
7418 SDValue DAGCombiner::visitABS(SDNode *N) {
7419 SDValue N0 = N->getOperand(0);
7420 EVT VT = N->getValueType(0);
7422 // fold (abs c1) -> c2
7423 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7424 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7425 // fold (abs (abs x)) -> (abs x)
7426 if (N0.getOpcode() == ISD::ABS)
7427 return N0;
7428 // fold (abs x) -> x iff not-negative
7429 if (DAG.SignBitIsZero(N0))
7430 return N0;
7431 return SDValue();
7434 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7435 SDValue N0 = N->getOperand(0);
7436 EVT VT = N->getValueType(0);
7438 // fold (bswap c1) -> c2
7439 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7440 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7441 // fold (bswap (bswap x)) -> x
7442 if (N0.getOpcode() == ISD::BSWAP)
7443 return N0->getOperand(0);
7444 return SDValue();
7447 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7448 SDValue N0 = N->getOperand(0);
7449 EVT VT = N->getValueType(0);
7451 // fold (bitreverse c1) -> c2
7452 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7453 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7454 // fold (bitreverse (bitreverse x)) -> x
7455 if (N0.getOpcode() == ISD::BITREVERSE)
7456 return N0.getOperand(0);
7457 return SDValue();
7460 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7461 SDValue N0 = N->getOperand(0);
7462 EVT VT = N->getValueType(0);
7464 // fold (ctlz c1) -> c2
7465 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7466 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7468 // If the value is known never to be zero, switch to the undef version.
7469 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7470 if (DAG.isKnownNeverZero(N0))
7471 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7474 return SDValue();
7477 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7478 SDValue N0 = N->getOperand(0);
7479 EVT VT = N->getValueType(0);
7481 // fold (ctlz_zero_undef c1) -> c2
7482 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7483 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7484 return SDValue();
7487 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7488 SDValue N0 = N->getOperand(0);
7489 EVT VT = N->getValueType(0);
7491 // fold (cttz c1) -> c2
7492 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7493 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7495 // If the value is known never to be zero, switch to the undef version.
7496 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7497 if (DAG.isKnownNeverZero(N0))
7498 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7501 return SDValue();
7504 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7505 SDValue N0 = N->getOperand(0);
7506 EVT VT = N->getValueType(0);
7508 // fold (cttz_zero_undef c1) -> c2
7509 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7510 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7511 return SDValue();
7514 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7515 SDValue N0 = N->getOperand(0);
7516 EVT VT = N->getValueType(0);
7518 // fold (ctpop c1) -> c2
7519 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7520 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7521 return SDValue();
7524 // FIXME: This should be checking for no signed zeros on individual operands, as
7525 // well as no nans.
7526 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
7527 SDValue RHS,
7528 const TargetLowering &TLI) {
7529 const TargetOptions &Options = DAG.getTarget().Options;
7530 EVT VT = LHS.getValueType();
7532 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7533 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
7534 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7537 /// Generate Min/Max node
7538 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7539 SDValue RHS, SDValue True, SDValue False,
7540 ISD::CondCode CC, const TargetLowering &TLI,
7541 SelectionDAG &DAG) {
7542 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7543 return SDValue();
7545 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7546 switch (CC) {
7547 case ISD::SETOLT:
7548 case ISD::SETOLE:
7549 case ISD::SETLT:
7550 case ISD::SETLE:
7551 case ISD::SETULT:
7552 case ISD::SETULE: {
7553 // Since it's known never nan to get here already, either fminnum or
7554 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7555 // expanded in terms of it.
7556 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7557 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7558 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7560 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7561 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7562 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7563 return SDValue();
7565 case ISD::SETOGT:
7566 case ISD::SETOGE:
7567 case ISD::SETGT:
7568 case ISD::SETGE:
7569 case ISD::SETUGT:
7570 case ISD::SETUGE: {
7571 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7572 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7573 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7575 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7576 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7577 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7578 return SDValue();
7580 default:
7581 return SDValue();
7585 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7586 SDValue Cond = N->getOperand(0);
7587 SDValue N1 = N->getOperand(1);
7588 SDValue N2 = N->getOperand(2);
7589 EVT VT = N->getValueType(0);
7590 EVT CondVT = Cond.getValueType();
7591 SDLoc DL(N);
7593 if (!VT.isInteger())
7594 return SDValue();
7596 auto *C1 = dyn_cast<ConstantSDNode>(N1);
7597 auto *C2 = dyn_cast<ConstantSDNode>(N2);
7598 if (!C1 || !C2)
7599 return SDValue();
7601 // Only do this before legalization to avoid conflicting with target-specific
7602 // transforms in the other direction (create a select from a zext/sext). There
7603 // is also a target-independent combine here in DAGCombiner in the other
7604 // direction for (select Cond, -1, 0) when the condition is not i1.
7605 if (CondVT == MVT::i1 && !LegalOperations) {
7606 if (C1->isNullValue() && C2->isOne()) {
7607 // select Cond, 0, 1 --> zext (!Cond)
7608 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7609 if (VT != MVT::i1)
7610 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7611 return NotCond;
7613 if (C1->isNullValue() && C2->isAllOnesValue()) {
7614 // select Cond, 0, -1 --> sext (!Cond)
7615 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7616 if (VT != MVT::i1)
7617 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7618 return NotCond;
7620 if (C1->isOne() && C2->isNullValue()) {
7621 // select Cond, 1, 0 --> zext (Cond)
7622 if (VT != MVT::i1)
7623 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7624 return Cond;
7626 if (C1->isAllOnesValue() && C2->isNullValue()) {
7627 // select Cond, -1, 0 --> sext (Cond)
7628 if (VT != MVT::i1)
7629 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7630 return Cond;
7633 // For any constants that differ by 1, we can transform the select into an
7634 // extend and add. Use a target hook because some targets may prefer to
7635 // transform in the other direction.
7636 if (TLI.convertSelectOfConstantsToMath(VT)) {
7637 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7638 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7639 if (VT != MVT::i1)
7640 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7641 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7643 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7644 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7645 if (VT != MVT::i1)
7646 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7647 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7651 return SDValue();
7654 // fold (select Cond, 0, 1) -> (xor Cond, 1)
7655 // We can't do this reliably if integer based booleans have different contents
7656 // to floating point based booleans. This is because we can't tell whether we
7657 // have an integer-based boolean or a floating-point-based boolean unless we
7658 // can find the SETCC that produced it and inspect its operands. This is
7659 // fairly easy if C is the SETCC node, but it can potentially be
7660 // undiscoverable (or not reasonably discoverable). For example, it could be
7661 // in another basic block or it could require searching a complicated
7662 // expression.
7663 if (CondVT.isInteger() &&
7664 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7665 TargetLowering::ZeroOrOneBooleanContent &&
7666 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7667 TargetLowering::ZeroOrOneBooleanContent &&
7668 C1->isNullValue() && C2->isOne()) {
7669 SDValue NotCond =
7670 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7671 if (VT.bitsEq(CondVT))
7672 return NotCond;
7673 return DAG.getZExtOrTrunc(NotCond, DL, VT);
7676 return SDValue();
7679 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7680 SDValue N0 = N->getOperand(0);
7681 SDValue N1 = N->getOperand(1);
7682 SDValue N2 = N->getOperand(2);
7683 EVT VT = N->getValueType(0);
7684 EVT VT0 = N0.getValueType();
7685 SDLoc DL(N);
7687 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7688 return V;
7690 // fold (select X, X, Y) -> (or X, Y)
7691 // fold (select X, 1, Y) -> (or C, Y)
7692 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7693 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7695 if (SDValue V = foldSelectOfConstants(N))
7696 return V;
7698 // fold (select C, 0, X) -> (and (not C), X)
7699 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7700 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7701 AddToWorklist(NOTNode.getNode());
7702 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7704 // fold (select C, X, 1) -> (or (not C), X)
7705 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7706 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7707 AddToWorklist(NOTNode.getNode());
7708 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7710 // fold (select X, Y, X) -> (and X, Y)
7711 // fold (select X, Y, 0) -> (and X, Y)
7712 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7713 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7715 // If we can fold this based on the true/false value, do so.
7716 if (SimplifySelectOps(N, N1, N2))
7717 return SDValue(N, 0); // Don't revisit N.
7719 if (VT0 == MVT::i1) {
7720 // The code in this block deals with the following 2 equivalences:
7721 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7722 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7723 // The target can specify its preferred form with the
7724 // shouldNormalizeToSelectSequence() callback. However we always transform
7725 // to the right anyway if we find the inner select exists in the DAG anyway
7726 // and we always transform to the left side if we know that we can further
7727 // optimize the combination of the conditions.
7728 bool normalizeToSequence =
7729 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7730 // select (and Cond0, Cond1), X, Y
7731 // -> select Cond0, (select Cond1, X, Y), Y
7732 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7733 SDValue Cond0 = N0->getOperand(0);
7734 SDValue Cond1 = N0->getOperand(1);
7735 SDValue InnerSelect =
7736 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7737 if (normalizeToSequence || !InnerSelect.use_empty())
7738 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7739 InnerSelect, N2);
7740 // Cleanup on failure.
7741 if (InnerSelect.use_empty())
7742 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7744 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7745 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7746 SDValue Cond0 = N0->getOperand(0);
7747 SDValue Cond1 = N0->getOperand(1);
7748 SDValue InnerSelect =
7749 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7750 if (normalizeToSequence || !InnerSelect.use_empty())
7751 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7752 InnerSelect);
7753 // Cleanup on failure.
7754 if (InnerSelect.use_empty())
7755 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7758 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7759 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7760 SDValue N1_0 = N1->getOperand(0);
7761 SDValue N1_1 = N1->getOperand(1);
7762 SDValue N1_2 = N1->getOperand(2);
7763 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7764 // Create the actual and node if we can generate good code for it.
7765 if (!normalizeToSequence) {
7766 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7767 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7769 // Otherwise see if we can optimize the "and" to a better pattern.
7770 if (SDValue Combined = visitANDLike(N0, N1_0, N))
7771 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7772 N2);
7775 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7776 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7777 SDValue N2_0 = N2->getOperand(0);
7778 SDValue N2_1 = N2->getOperand(1);
7779 SDValue N2_2 = N2->getOperand(2);
7780 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7781 // Create the actual or node if we can generate good code for it.
7782 if (!normalizeToSequence) {
7783 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7784 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7786 // Otherwise see if we can optimize to a better pattern.
7787 if (SDValue Combined = visitORLike(N0, N2_0, N))
7788 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7789 N2_2);
7794 // select (not Cond), N1, N2 -> select Cond, N2, N1
7795 if (SDValue F = extractBooleanFlip(N0, TLI))
7796 return DAG.getSelect(DL, VT, F, N2, N1);
7798 // Fold selects based on a setcc into other things, such as min/max/abs.
7799 if (N0.getOpcode() == ISD::SETCC) {
7800 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7801 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7803 // select (fcmp lt x, y), x, y -> fminnum x, y
7804 // select (fcmp gt x, y), x, y -> fmaxnum x, y
7806 // This is OK if we don't care what happens if either operand is a NaN.
7807 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
7808 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7809 CC, TLI, DAG))
7810 return FMinMax;
7812 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7813 // This is conservatively limited to pre-legal-operations to give targets
7814 // a chance to reverse the transform if they want to do that. Also, it is
7815 // unlikely that the pattern would be formed late, so it's probably not
7816 // worth going through the other checks.
7817 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7818 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7819 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7820 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7821 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7822 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7823 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7824 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7826 // The IR equivalent of this transform would have this form:
7827 // %a = add %x, C
7828 // %c = icmp ugt %x, ~C
7829 // %r = select %c, -1, %a
7830 // =>
7831 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7832 // %u0 = extractvalue %u, 0
7833 // %u1 = extractvalue %u, 1
7834 // %r = select %u1, -1, %u0
7835 SDVTList VTs = DAG.getVTList(VT, VT0);
7836 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7837 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7841 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7842 (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7843 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7844 N0.getOperand(2));
7846 return SimplifySelect(DL, N0, N1, N2);
7849 return SDValue();
7852 static
7853 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7854 SDLoc DL(N);
7855 EVT LoVT, HiVT;
7856 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7858 // Split the inputs.
7859 SDValue Lo, Hi, LL, LH, RL, RH;
7860 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7861 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7863 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7864 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7866 return std::make_pair(Lo, Hi);
7869 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7870 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7871 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7872 SDLoc DL(N);
7873 SDValue Cond = N->getOperand(0);
7874 SDValue LHS = N->getOperand(1);
7875 SDValue RHS = N->getOperand(2);
7876 EVT VT = N->getValueType(0);
7877 int NumElems = VT.getVectorNumElements();
7878 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7879 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7880 Cond.getOpcode() == ISD::BUILD_VECTOR);
7882 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7883 // binary ones here.
7884 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7885 return SDValue();
7887 // We're sure we have an even number of elements due to the
7888 // concat_vectors we have as arguments to vselect.
7889 // Skip BV elements until we find one that's not an UNDEF
7890 // After we find an UNDEF element, keep looping until we get to half the
7891 // length of the BV and see if all the non-undef nodes are the same.
7892 ConstantSDNode *BottomHalf = nullptr;
7893 for (int i = 0; i < NumElems / 2; ++i) {
7894 if (Cond->getOperand(i)->isUndef())
7895 continue;
7897 if (BottomHalf == nullptr)
7898 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7899 else if (Cond->getOperand(i).getNode() != BottomHalf)
7900 return SDValue();
7903 // Do the same for the second half of the BuildVector
7904 ConstantSDNode *TopHalf = nullptr;
7905 for (int i = NumElems / 2; i < NumElems; ++i) {
7906 if (Cond->getOperand(i)->isUndef())
7907 continue;
7909 if (TopHalf == nullptr)
7910 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7911 else if (Cond->getOperand(i).getNode() != TopHalf)
7912 return SDValue();
7915 assert(TopHalf && BottomHalf &&
7916 "One half of the selector was all UNDEFs and the other was all the "
7917 "same value. This should have been addressed before this function.");
7918 return DAG.getNode(
7919 ISD::CONCAT_VECTORS, DL, VT,
7920 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7921 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7924 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7925 if (Level >= AfterLegalizeTypes)
7926 return SDValue();
7928 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7929 SDValue Mask = MSC->getMask();
7930 SDValue Data = MSC->getValue();
7931 SDLoc DL(N);
7933 // If the MSCATTER data type requires splitting and the mask is provided by a
7934 // SETCC, then split both nodes and its operands before legalization. This
7935 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7936 // and enables future optimizations (e.g. min/max pattern matching on X86).
7937 if (Mask.getOpcode() != ISD::SETCC)
7938 return SDValue();
7940 // Check if any splitting is required.
7941 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7942 TargetLowering::TypeSplitVector)
7943 return SDValue();
7944 SDValue MaskLo, MaskHi;
7945 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7947 EVT LoVT, HiVT;
7948 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7950 SDValue Chain = MSC->getChain();
7952 EVT MemoryVT = MSC->getMemoryVT();
7953 unsigned Alignment = MSC->getOriginalAlignment();
7955 EVT LoMemVT, HiMemVT;
7956 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7958 SDValue DataLo, DataHi;
7959 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7961 SDValue Scale = MSC->getScale();
7962 SDValue BasePtr = MSC->getBasePtr();
7963 SDValue IndexLo, IndexHi;
7964 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7966 MachineMemOperand *MMO = DAG.getMachineFunction().
7967 getMachineMemOperand(MSC->getPointerInfo(),
7968 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
7969 Alignment, MSC->getAAInfo(), MSC->getRanges());
7971 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7972 SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7973 DataLo.getValueType(), DL, OpsLo, MMO);
7975 // The order of the Scatter operation after split is well defined. The "Hi"
7976 // part comes after the "Lo". So these two operations should be chained one
7977 // after another.
7978 SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7979 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7980 DL, OpsHi, MMO);
7983 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7984 if (Level >= AfterLegalizeTypes)
7985 return SDValue();
7987 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7988 SDValue Mask = MST->getMask();
7989 SDValue Data = MST->getValue();
7990 EVT VT = Data.getValueType();
7991 SDLoc DL(N);
7993 // If the MSTORE data type requires splitting and the mask is provided by a
7994 // SETCC, then split both nodes and its operands before legalization. This
7995 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7996 // and enables future optimizations (e.g. min/max pattern matching on X86).
7997 if (Mask.getOpcode() == ISD::SETCC) {
7998 // Check if any splitting is required.
7999 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8000 TargetLowering::TypeSplitVector)
8001 return SDValue();
8003 SDValue MaskLo, MaskHi, Lo, Hi;
8004 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8006 SDValue Chain = MST->getChain();
8007 SDValue Ptr = MST->getBasePtr();
8009 EVT MemoryVT = MST->getMemoryVT();
8010 unsigned Alignment = MST->getOriginalAlignment();
8012 // if Alignment is equal to the vector size,
8013 // take the half of it for the second part
8014 unsigned SecondHalfAlignment =
8015 (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
8017 EVT LoMemVT, HiMemVT;
8018 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8020 SDValue DataLo, DataHi;
8021 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8023 MachineMemOperand *MMO = DAG.getMachineFunction().
8024 getMachineMemOperand(MST->getPointerInfo(),
8025 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
8026 Alignment, MST->getAAInfo(), MST->getRanges());
8028 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
8029 MST->isTruncatingStore(),
8030 MST->isCompressingStore());
8032 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8033 MST->isCompressingStore());
8034 unsigned HiOffset = LoMemVT.getStoreSize();
8036 MMO = DAG.getMachineFunction().getMachineMemOperand(
8037 MST->getPointerInfo().getWithOffset(HiOffset),
8038 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
8039 MST->getAAInfo(), MST->getRanges());
8041 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
8042 MST->isTruncatingStore(),
8043 MST->isCompressingStore());
8045 AddToWorklist(Lo.getNode());
8046 AddToWorklist(Hi.getNode());
8048 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8050 return SDValue();
8053 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8054 if (Level >= AfterLegalizeTypes)
8055 return SDValue();
8057 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8058 SDValue Mask = MGT->getMask();
8059 SDLoc DL(N);
8061 // If the MGATHER result requires splitting and the mask is provided by a
8062 // SETCC, then split both nodes and its operands before legalization. This
8063 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8064 // and enables future optimizations (e.g. min/max pattern matching on X86).
8066 if (Mask.getOpcode() != ISD::SETCC)
8067 return SDValue();
8069 EVT VT = N->getValueType(0);
8071 // Check if any splitting is required.
8072 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8073 TargetLowering::TypeSplitVector)
8074 return SDValue();
8076 SDValue MaskLo, MaskHi, Lo, Hi;
8077 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8079 SDValue PassThru = MGT->getPassThru();
8080 SDValue PassThruLo, PassThruHi;
8081 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8083 EVT LoVT, HiVT;
8084 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
8086 SDValue Chain = MGT->getChain();
8087 EVT MemoryVT = MGT->getMemoryVT();
8088 unsigned Alignment = MGT->getOriginalAlignment();
8090 EVT LoMemVT, HiMemVT;
8091 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8093 SDValue Scale = MGT->getScale();
8094 SDValue BasePtr = MGT->getBasePtr();
8095 SDValue Index = MGT->getIndex();
8096 SDValue IndexLo, IndexHi;
8097 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
8099 MachineMemOperand *MMO = DAG.getMachineFunction().
8100 getMachineMemOperand(MGT->getPointerInfo(),
8101 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
8102 Alignment, MGT->getAAInfo(), MGT->getRanges());
8104 SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
8105 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
8106 MMO);
8108 SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
8109 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
8110 MMO);
8112 AddToWorklist(Lo.getNode());
8113 AddToWorklist(Hi.getNode());
8115 // Build a factor node to remember that this load is independent of the
8116 // other one.
8117 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8118 Hi.getValue(1));
8120 // Legalized the chain result - switch anything that used the old chain to
8121 // use the new one.
8122 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
8124 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8126 SDValue RetOps[] = { GatherRes, Chain };
8127 return DAG.getMergeValues(RetOps, DL);
8130 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8131 if (Level >= AfterLegalizeTypes)
8132 return SDValue();
8134 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
8135 SDValue Mask = MLD->getMask();
8136 SDLoc DL(N);
8138 // If the MLOAD result requires splitting and the mask is provided by a
8139 // SETCC, then split both nodes and its operands before legalization. This
8140 // prevents the type legalizer from unrolling SETCC into scalar comparisons
8141 // and enables future optimizations (e.g. min/max pattern matching on X86).
8142 if (Mask.getOpcode() == ISD::SETCC) {
8143 EVT VT = N->getValueType(0);
8145 // Check if any splitting is required.
8146 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8147 TargetLowering::TypeSplitVector)
8148 return SDValue();
8150 SDValue MaskLo, MaskHi, Lo, Hi;
8151 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8153 SDValue PassThru = MLD->getPassThru();
8154 SDValue PassThruLo, PassThruHi;
8155 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8157 EVT LoVT, HiVT;
8158 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
8160 SDValue Chain = MLD->getChain();
8161 SDValue Ptr = MLD->getBasePtr();
8162 EVT MemoryVT = MLD->getMemoryVT();
8163 unsigned Alignment = MLD->getOriginalAlignment();
8165 // if Alignment is equal to the vector size,
8166 // take the half of it for the second part
8167 unsigned SecondHalfAlignment =
8168 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
8169 Alignment/2 : Alignment;
8171 EVT LoMemVT, HiMemVT;
8172 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8174 MachineMemOperand *MMO = DAG.getMachineFunction().
8175 getMachineMemOperand(MLD->getPointerInfo(),
8176 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
8177 Alignment, MLD->getAAInfo(), MLD->getRanges());
8179 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8180 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8182 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8183 MLD->isExpandingLoad());
8184 unsigned HiOffset = LoMemVT.getStoreSize();
8186 MMO = DAG.getMachineFunction().getMachineMemOperand(
8187 MLD->getPointerInfo().getWithOffset(HiOffset),
8188 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
8189 MLD->getAAInfo(), MLD->getRanges());
8191 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8192 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8194 AddToWorklist(Lo.getNode());
8195 AddToWorklist(Hi.getNode());
8197 // Build a factor node to remember that this load is independent of the
8198 // other one.
8199 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8200 Hi.getValue(1));
8202 // Legalized the chain result - switch anything that used the old chain to
8203 // use the new one.
8204 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8206 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8208 SDValue RetOps[] = { LoadRes, Chain };
8209 return DAG.getMergeValues(RetOps, DL);
8211 return SDValue();
8214 /// A vector select of 2 constant vectors can be simplified to math/logic to
8215 /// avoid a variable select instruction and possibly avoid constant loads.
8216 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8217 SDValue Cond = N->getOperand(0);
8218 SDValue N1 = N->getOperand(1);
8219 SDValue N2 = N->getOperand(2);
8220 EVT VT = N->getValueType(0);
8221 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8222 !TLI.convertSelectOfConstantsToMath(VT) ||
8223 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8224 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8225 return SDValue();
8227 // Check if we can use the condition value to increment/decrement a single
8228 // constant value. This simplifies a select to an add and removes a constant
8229 // load/materialization from the general case.
8230 bool AllAddOne = true;
8231 bool AllSubOne = true;
8232 unsigned Elts = VT.getVectorNumElements();
8233 for (unsigned i = 0; i != Elts; ++i) {
8234 SDValue N1Elt = N1.getOperand(i);
8235 SDValue N2Elt = N2.getOperand(i);
8236 if (N1Elt.isUndef() || N2Elt.isUndef())
8237 continue;
8239 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8240 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8241 if (C1 != C2 + 1)
8242 AllAddOne = false;
8243 if (C1 != C2 - 1)
8244 AllSubOne = false;
8247 // Further simplifications for the extra-special cases where the constants are
8248 // all 0 or all -1 should be implemented as folds of these patterns.
8249 SDLoc DL(N);
8250 if (AllAddOne || AllSubOne) {
8251 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8252 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8253 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8254 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8255 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8258 // The general case for select-of-constants:
8259 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8260 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8261 // leave that to a machine-specific pass.
8262 return SDValue();
8265 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8266 SDValue N0 = N->getOperand(0);
8267 SDValue N1 = N->getOperand(1);
8268 SDValue N2 = N->getOperand(2);
8269 EVT VT = N->getValueType(0);
8270 SDLoc DL(N);
8272 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8273 return V;
8275 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8276 if (SDValue F = extractBooleanFlip(N0, TLI))
8277 return DAG.getSelect(DL, VT, F, N2, N1);
8279 // Canonicalize integer abs.
8280 // vselect (setg[te] X, 0), X, -X ->
8281 // vselect (setgt X, -1), X, -X ->
8282 // vselect (setl[te] X, 0), -X, X ->
8283 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8284 if (N0.getOpcode() == ISD::SETCC) {
8285 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8286 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8287 bool isAbs = false;
8288 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8290 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8291 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8292 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8293 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8294 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8295 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8296 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8298 if (isAbs) {
8299 EVT VT = LHS.getValueType();
8300 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8301 return DAG.getNode(ISD::ABS, DL, VT, LHS);
8303 SDValue Shift = DAG.getNode(
8304 ISD::SRA, DL, VT, LHS,
8305 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8306 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8307 AddToWorklist(Shift.getNode());
8308 AddToWorklist(Add.getNode());
8309 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8312 // vselect x, y (fcmp lt x, y) -> fminnum x, y
8313 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8315 // This is OK if we don't care about what happens if either operand is a
8316 // NaN.
8318 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
8319 N0.getOperand(1), TLI)) {
8320 if (SDValue FMinMax = combineMinNumMaxNum(
8321 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8322 return FMinMax;
8325 // If this select has a condition (setcc) with narrower operands than the
8326 // select, try to widen the compare to match the select width.
8327 // TODO: This should be extended to handle any constant.
8328 // TODO: This could be extended to handle non-loading patterns, but that
8329 // requires thorough testing to avoid regressions.
8330 if (isNullOrNullSplat(RHS)) {
8331 EVT NarrowVT = LHS.getValueType();
8332 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8333 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8334 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8335 unsigned WideWidth = WideVT.getScalarSizeInBits();
8336 bool IsSigned = isSignedIntSetCC(CC);
8337 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8338 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8339 SetCCWidth != 1 && SetCCWidth < WideWidth &&
8340 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8341 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8342 // Both compare operands can be widened for free. The LHS can use an
8343 // extended load, and the RHS is a constant:
8344 // vselect (ext (setcc load(X), C)), N1, N2 -->
8345 // vselect (setcc extload(X), C'), N1, N2
8346 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8347 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8348 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8349 EVT WideSetCCVT = getSetCCResultType(WideVT);
8350 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8351 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8356 if (SimplifySelectOps(N, N1, N2))
8357 return SDValue(N, 0); // Don't revisit N.
8359 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8360 if (ISD::isBuildVectorAllOnes(N0.getNode()))
8361 return N1;
8362 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8363 if (ISD::isBuildVectorAllZeros(N0.getNode()))
8364 return N2;
8366 // The ConvertSelectToConcatVector function is assuming both the above
8367 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8368 // and addressed.
8369 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8370 N2.getOpcode() == ISD::CONCAT_VECTORS &&
8371 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8372 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8373 return CV;
8376 if (SDValue V = foldVSelectOfConstants(N))
8377 return V;
8379 return SDValue();
8382 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8383 SDValue N0 = N->getOperand(0);
8384 SDValue N1 = N->getOperand(1);
8385 SDValue N2 = N->getOperand(2);
8386 SDValue N3 = N->getOperand(3);
8387 SDValue N4 = N->getOperand(4);
8388 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8390 // fold select_cc lhs, rhs, x, x, cc -> x
8391 if (N2 == N3)
8392 return N2;
8394 // Determine if the condition we're dealing with is constant
8395 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8396 CC, SDLoc(N), false)) {
8397 AddToWorklist(SCC.getNode());
8399 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8400 if (!SCCC->isNullValue())
8401 return N2; // cond always true -> true val
8402 else
8403 return N3; // cond always false -> false val
8404 } else if (SCC->isUndef()) {
8405 // When the condition is UNDEF, just return the first operand. This is
8406 // coherent the DAG creation, no setcc node is created in this case
8407 return N2;
8408 } else if (SCC.getOpcode() == ISD::SETCC) {
8409 // Fold to a simpler select_cc
8410 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8411 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8412 SCC.getOperand(2));
8416 // If we can fold this based on the true/false value, do so.
8417 if (SimplifySelectOps(N, N2, N3))
8418 return SDValue(N, 0); // Don't revisit N.
8420 // fold select_cc into other things, such as min/max/abs
8421 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8424 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8425 // setcc is very commonly used as an argument to brcond. This pattern
8426 // also lend itself to numerous combines and, as a result, it is desired
8427 // we keep the argument to a brcond as a setcc as much as possible.
8428 bool PreferSetCC =
8429 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8431 SDValue Combined = SimplifySetCC(
8432 N->getValueType(0), N->getOperand(0), N->getOperand(1),
8433 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8435 if (!Combined)
8436 return SDValue();
8438 // If we prefer to have a setcc, and we don't, we'll try our best to
8439 // recreate one using rebuildSetCC.
8440 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8441 SDValue NewSetCC = rebuildSetCC(Combined);
8443 // We don't have anything interesting to combine to.
8444 if (NewSetCC.getNode() == N)
8445 return SDValue();
8447 if (NewSetCC)
8448 return NewSetCC;
8451 return Combined;
8454 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8455 SDValue LHS = N->getOperand(0);
8456 SDValue RHS = N->getOperand(1);
8457 SDValue Carry = N->getOperand(2);
8458 SDValue Cond = N->getOperand(3);
8460 // If Carry is false, fold to a regular SETCC.
8461 if (isNullConstant(Carry))
8462 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8464 return SDValue();
8467 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8468 /// a build_vector of constants.
8469 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8470 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8471 /// Vector extends are not folded if operations are legal; this is to
8472 /// avoid introducing illegal build_vector dag nodes.
8473 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8474 SelectionDAG &DAG, bool LegalTypes) {
8475 unsigned Opcode = N->getOpcode();
8476 SDValue N0 = N->getOperand(0);
8477 EVT VT = N->getValueType(0);
8479 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8480 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8481 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8482 && "Expected EXTEND dag node in input!");
8484 // fold (sext c1) -> c1
8485 // fold (zext c1) -> c1
8486 // fold (aext c1) -> c1
8487 if (isa<ConstantSDNode>(N0))
8488 return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8490 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8491 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8492 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8493 EVT SVT = VT.getScalarType();
8494 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8495 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8496 return SDValue();
8498 // We can fold this node into a build_vector.
8499 unsigned VTBits = SVT.getSizeInBits();
8500 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8501 SmallVector<SDValue, 8> Elts;
8502 unsigned NumElts = VT.getVectorNumElements();
8503 SDLoc DL(N);
8505 // For zero-extensions, UNDEF elements still guarantee to have the upper
8506 // bits set to zero.
8507 bool IsZext =
8508 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8510 for (unsigned i = 0; i != NumElts; ++i) {
8511 SDValue Op = N0.getOperand(i);
8512 if (Op.isUndef()) {
8513 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8514 continue;
8517 SDLoc DL(Op);
8518 // Get the constant value and if needed trunc it to the size of the type.
8519 // Nodes like build_vector might have constants wider than the scalar type.
8520 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8521 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8522 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8523 else
8524 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8527 return DAG.getBuildVector(VT, DL, Elts);
8530 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8531 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8532 // transformation. Returns true if extension are possible and the above
8533 // mentioned transformation is profitable.
8534 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8535 unsigned ExtOpc,
8536 SmallVectorImpl<SDNode *> &ExtendNodes,
8537 const TargetLowering &TLI) {
8538 bool HasCopyToRegUses = false;
8539 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8540 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8541 UE = N0.getNode()->use_end();
8542 UI != UE; ++UI) {
8543 SDNode *User = *UI;
8544 if (User == N)
8545 continue;
8546 if (UI.getUse().getResNo() != N0.getResNo())
8547 continue;
8548 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8549 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8550 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8551 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8552 // Sign bits will be lost after a zext.
8553 return false;
8554 bool Add = false;
8555 for (unsigned i = 0; i != 2; ++i) {
8556 SDValue UseOp = User->getOperand(i);
8557 if (UseOp == N0)
8558 continue;
8559 if (!isa<ConstantSDNode>(UseOp))
8560 return false;
8561 Add = true;
8563 if (Add)
8564 ExtendNodes.push_back(User);
8565 continue;
8567 // If truncates aren't free and there are users we can't
8568 // extend, it isn't worthwhile.
8569 if (!isTruncFree)
8570 return false;
8571 // Remember if this value is live-out.
8572 if (User->getOpcode() == ISD::CopyToReg)
8573 HasCopyToRegUses = true;
8576 if (HasCopyToRegUses) {
8577 bool BothLiveOut = false;
8578 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8579 UI != UE; ++UI) {
8580 SDUse &Use = UI.getUse();
8581 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8582 BothLiveOut = true;
8583 break;
8586 if (BothLiveOut)
8587 // Both unextended and extended values are live out. There had better be
8588 // a good reason for the transformation.
8589 return ExtendNodes.size();
8591 return true;
8594 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8595 SDValue OrigLoad, SDValue ExtLoad,
8596 ISD::NodeType ExtType) {
8597 // Extend SetCC uses if necessary.
8598 SDLoc DL(ExtLoad);
8599 for (SDNode *SetCC : SetCCs) {
8600 SmallVector<SDValue, 4> Ops;
8602 for (unsigned j = 0; j != 2; ++j) {
8603 SDValue SOp = SetCC->getOperand(j);
8604 if (SOp == OrigLoad)
8605 Ops.push_back(ExtLoad);
8606 else
8607 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8610 Ops.push_back(SetCC->getOperand(2));
8611 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8615 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8616 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8617 SDValue N0 = N->getOperand(0);
8618 EVT DstVT = N->getValueType(0);
8619 EVT SrcVT = N0.getValueType();
8621 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8622 N->getOpcode() == ISD::ZERO_EXTEND) &&
8623 "Unexpected node type (not an extend)!");
8625 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8626 // For example, on a target with legal v4i32, but illegal v8i32, turn:
8627 // (v8i32 (sext (v8i16 (load x))))
8628 // into:
8629 // (v8i32 (concat_vectors (v4i32 (sextload x)),
8630 // (v4i32 (sextload (x + 16)))))
8631 // Where uses of the original load, i.e.:
8632 // (v8i16 (load x))
8633 // are replaced with:
8634 // (v8i16 (truncate
8635 // (v8i32 (concat_vectors (v4i32 (sextload x)),
8636 // (v4i32 (sextload (x + 16)))))))
8638 // This combine is only applicable to illegal, but splittable, vectors.
8639 // All legal types, and illegal non-vector types, are handled elsewhere.
8640 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8642 if (N0->getOpcode() != ISD::LOAD)
8643 return SDValue();
8645 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8647 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8648 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8649 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8650 return SDValue();
8652 SmallVector<SDNode *, 4> SetCCs;
8653 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8654 return SDValue();
8656 ISD::LoadExtType ExtType =
8657 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8659 // Try to split the vector types to get down to legal types.
8660 EVT SplitSrcVT = SrcVT;
8661 EVT SplitDstVT = DstVT;
8662 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8663 SplitSrcVT.getVectorNumElements() > 1) {
8664 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8665 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8668 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8669 return SDValue();
8671 SDLoc DL(N);
8672 const unsigned NumSplits =
8673 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8674 const unsigned Stride = SplitSrcVT.getStoreSize();
8675 SmallVector<SDValue, 4> Loads;
8676 SmallVector<SDValue, 4> Chains;
8678 SDValue BasePtr = LN0->getBasePtr();
8679 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8680 const unsigned Offset = Idx * Stride;
8681 const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8683 SDValue SplitLoad = DAG.getExtLoad(
8684 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8685 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8686 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8688 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8689 DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8691 Loads.push_back(SplitLoad.getValue(0));
8692 Chains.push_back(SplitLoad.getValue(1));
8695 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8696 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8698 // Simplify TF.
8699 AddToWorklist(NewChain.getNode());
8701 CombineTo(N, NewValue);
8703 // Replace uses of the original load (before extension)
8704 // with a truncate of the concatenated sextloaded vectors.
8705 SDValue Trunc =
8706 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8707 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8708 CombineTo(N0.getNode(), Trunc, NewChain);
8709 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8712 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8713 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8714 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8715 assert(N->getOpcode() == ISD::ZERO_EXTEND);
8716 EVT VT = N->getValueType(0);
8717 EVT OrigVT = N->getOperand(0).getValueType();
8718 if (TLI.isZExtFree(OrigVT, VT))
8719 return SDValue();
8721 // and/or/xor
8722 SDValue N0 = N->getOperand(0);
8723 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8724 N0.getOpcode() == ISD::XOR) ||
8725 N0.getOperand(1).getOpcode() != ISD::Constant ||
8726 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8727 return SDValue();
8729 // shl/shr
8730 SDValue N1 = N0->getOperand(0);
8731 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8732 N1.getOperand(1).getOpcode() != ISD::Constant ||
8733 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8734 return SDValue();
8736 // load
8737 if (!isa<LoadSDNode>(N1.getOperand(0)))
8738 return SDValue();
8739 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8740 EVT MemVT = Load->getMemoryVT();
8741 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8742 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8743 return SDValue();
8746 // If the shift op is SHL, the logic op must be AND, otherwise the result
8747 // will be wrong.
8748 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8749 return SDValue();
8751 if (!N0.hasOneUse() || !N1.hasOneUse())
8752 return SDValue();
8754 SmallVector<SDNode*, 4> SetCCs;
8755 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8756 ISD::ZERO_EXTEND, SetCCs, TLI))
8757 return SDValue();
8759 // Actually do the transformation.
8760 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8761 Load->getChain(), Load->getBasePtr(),
8762 Load->getMemoryVT(), Load->getMemOperand());
8764 SDLoc DL1(N1);
8765 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8766 N1.getOperand(1));
8768 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8769 Mask = Mask.zext(VT.getSizeInBits());
8770 SDLoc DL0(N0);
8771 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8772 DAG.getConstant(Mask, DL0, VT));
8774 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8775 CombineTo(N, And);
8776 if (SDValue(Load, 0).hasOneUse()) {
8777 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8778 } else {
8779 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8780 Load->getValueType(0), ExtLoad);
8781 CombineTo(Load, Trunc, ExtLoad.getValue(1));
8784 // N0 is dead at this point.
8785 recursivelyDeleteUnusedNodes(N0.getNode());
8787 return SDValue(N,0); // Return N so it doesn't get rechecked!
8790 /// If we're narrowing or widening the result of a vector select and the final
8791 /// size is the same size as a setcc (compare) feeding the select, then try to
8792 /// apply the cast operation to the select's operands because matching vector
8793 /// sizes for a select condition and other operands should be more efficient.
8794 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8795 unsigned CastOpcode = Cast->getOpcode();
8796 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8797 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8798 CastOpcode == ISD::FP_ROUND) &&
8799 "Unexpected opcode for vector select narrowing/widening");
8801 // We only do this transform before legal ops because the pattern may be
8802 // obfuscated by target-specific operations after legalization. Do not create
8803 // an illegal select op, however, because that may be difficult to lower.
8804 EVT VT = Cast->getValueType(0);
8805 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8806 return SDValue();
8808 SDValue VSel = Cast->getOperand(0);
8809 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8810 VSel.getOperand(0).getOpcode() != ISD::SETCC)
8811 return SDValue();
8813 // Does the setcc have the same vector size as the casted select?
8814 SDValue SetCC = VSel.getOperand(0);
8815 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8816 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8817 return SDValue();
8819 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8820 SDValue A = VSel.getOperand(1);
8821 SDValue B = VSel.getOperand(2);
8822 SDValue CastA, CastB;
8823 SDLoc DL(Cast);
8824 if (CastOpcode == ISD::FP_ROUND) {
8825 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8826 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8827 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8828 } else {
8829 CastA = DAG.getNode(CastOpcode, DL, VT, A);
8830 CastB = DAG.getNode(CastOpcode, DL, VT, B);
8832 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8835 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8836 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8837 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8838 const TargetLowering &TLI, EVT VT,
8839 bool LegalOperations, SDNode *N,
8840 SDValue N0, ISD::LoadExtType ExtLoadType) {
8841 SDNode *N0Node = N0.getNode();
8842 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8843 : ISD::isZEXTLoad(N0Node);
8844 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8845 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8846 return SDValue();
8848 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8849 EVT MemVT = LN0->getMemoryVT();
8850 if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8851 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8852 return SDValue();
8854 SDValue ExtLoad =
8855 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8856 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8857 Combiner.CombineTo(N, ExtLoad);
8858 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8859 if (LN0->use_empty())
8860 Combiner.recursivelyDeleteUnusedNodes(LN0);
8861 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8864 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8865 // Only generate vector extloads when 1) they're legal, and 2) they are
8866 // deemed desirable by the target.
8867 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8868 const TargetLowering &TLI, EVT VT,
8869 bool LegalOperations, SDNode *N, SDValue N0,
8870 ISD::LoadExtType ExtLoadType,
8871 ISD::NodeType ExtOpc) {
8872 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8873 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8874 ((LegalOperations || VT.isVector() ||
8875 cast<LoadSDNode>(N0)->isVolatile()) &&
8876 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8877 return {};
8879 bool DoXform = true;
8880 SmallVector<SDNode *, 4> SetCCs;
8881 if (!N0.hasOneUse())
8882 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8883 if (VT.isVector())
8884 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8885 if (!DoXform)
8886 return {};
8888 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8889 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8890 LN0->getBasePtr(), N0.getValueType(),
8891 LN0->getMemOperand());
8892 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8893 // If the load value is used only by N, replace it via CombineTo N.
8894 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8895 Combiner.CombineTo(N, ExtLoad);
8896 if (NoReplaceTrunc) {
8897 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8898 Combiner.recursivelyDeleteUnusedNodes(LN0);
8899 } else {
8900 SDValue Trunc =
8901 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8902 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8904 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8907 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8908 bool LegalOperations) {
8909 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8910 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8912 SDValue SetCC = N->getOperand(0);
8913 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8914 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8915 return SDValue();
8917 SDValue X = SetCC.getOperand(0);
8918 SDValue Ones = SetCC.getOperand(1);
8919 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8920 EVT VT = N->getValueType(0);
8921 EVT XVT = X.getValueType();
8922 // setge X, C is canonicalized to setgt, so we do not need to match that
8923 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8924 // not require the 'not' op.
8925 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8926 // Invert and smear/shift the sign bit:
8927 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8928 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8929 SDLoc DL(N);
8930 SDValue NotX = DAG.getNOT(DL, X, VT);
8931 SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8932 auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8933 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8935 return SDValue();
8938 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8939 SDValue N0 = N->getOperand(0);
8940 EVT VT = N->getValueType(0);
8941 SDLoc DL(N);
8943 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8944 return Res;
8946 // fold (sext (sext x)) -> (sext x)
8947 // fold (sext (aext x)) -> (sext x)
8948 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8949 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8951 if (N0.getOpcode() == ISD::TRUNCATE) {
8952 // fold (sext (truncate (load x))) -> (sext (smaller load x))
8953 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8954 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8955 SDNode *oye = N0.getOperand(0).getNode();
8956 if (NarrowLoad.getNode() != N0.getNode()) {
8957 CombineTo(N0.getNode(), NarrowLoad);
8958 // CombineTo deleted the truncate, if needed, but not what's under it.
8959 AddToWorklist(oye);
8961 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8964 // See if the value being truncated is already sign extended. If so, just
8965 // eliminate the trunc/sext pair.
8966 SDValue Op = N0.getOperand(0);
8967 unsigned OpBits = Op.getScalarValueSizeInBits();
8968 unsigned MidBits = N0.getScalarValueSizeInBits();
8969 unsigned DestBits = VT.getScalarSizeInBits();
8970 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8972 if (OpBits == DestBits) {
8973 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
8974 // bits, it is already ready.
8975 if (NumSignBits > DestBits-MidBits)
8976 return Op;
8977 } else if (OpBits < DestBits) {
8978 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
8979 // bits, just sext from i32.
8980 if (NumSignBits > OpBits-MidBits)
8981 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8982 } else {
8983 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
8984 // bits, just truncate to i32.
8985 if (NumSignBits > OpBits-MidBits)
8986 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8989 // fold (sext (truncate x)) -> (sextinreg x).
8990 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8991 N0.getValueType())) {
8992 if (OpBits < DestBits)
8993 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8994 else if (OpBits > DestBits)
8995 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8996 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8997 DAG.getValueType(N0.getValueType()));
9001 // Try to simplify (sext (load x)).
9002 if (SDValue foldedExt =
9003 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9004 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9005 return foldedExt;
9007 // fold (sext (load x)) to multiple smaller sextloads.
9008 // Only on illegal but splittable vectors.
9009 if (SDValue ExtLoad = CombineExtLoad(N))
9010 return ExtLoad;
9012 // Try to simplify (sext (sextload x)).
9013 if (SDValue foldedExt = tryToFoldExtOfExtload(
9014 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9015 return foldedExt;
9017 // fold (sext (and/or/xor (load x), cst)) ->
9018 // (and/or/xor (sextload x), (sext cst))
9019 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9020 N0.getOpcode() == ISD::XOR) &&
9021 isa<LoadSDNode>(N0.getOperand(0)) &&
9022 N0.getOperand(1).getOpcode() == ISD::Constant &&
9023 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9024 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9025 EVT MemVT = LN00->getMemoryVT();
9026 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9027 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9028 SmallVector<SDNode*, 4> SetCCs;
9029 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9030 ISD::SIGN_EXTEND, SetCCs, TLI);
9031 if (DoXform) {
9032 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9033 LN00->getChain(), LN00->getBasePtr(),
9034 LN00->getMemoryVT(),
9035 LN00->getMemOperand());
9036 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9037 Mask = Mask.sext(VT.getSizeInBits());
9038 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9039 ExtLoad, DAG.getConstant(Mask, DL, VT));
9040 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9041 bool NoReplaceTruncAnd = !N0.hasOneUse();
9042 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9043 CombineTo(N, And);
9044 // If N0 has multiple uses, change other uses as well.
9045 if (NoReplaceTruncAnd) {
9046 SDValue TruncAnd =
9047 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9048 CombineTo(N0.getNode(), TruncAnd);
9050 if (NoReplaceTrunc) {
9051 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9052 } else {
9053 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9054 LN00->getValueType(0), ExtLoad);
9055 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9057 return SDValue(N,0); // Return N so it doesn't get rechecked!
9062 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9063 return V;
9065 if (N0.getOpcode() == ISD::SETCC) {
9066 SDValue N00 = N0.getOperand(0);
9067 SDValue N01 = N0.getOperand(1);
9068 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9069 EVT N00VT = N0.getOperand(0).getValueType();
9071 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9072 // Only do this before legalize for now.
9073 if (VT.isVector() && !LegalOperations &&
9074 TLI.getBooleanContents(N00VT) ==
9075 TargetLowering::ZeroOrNegativeOneBooleanContent) {
9076 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9077 // of the same size as the compared operands. Only optimize sext(setcc())
9078 // if this is the case.
9079 EVT SVT = getSetCCResultType(N00VT);
9081 // If we already have the desired type, don't change it.
9082 if (SVT != N0.getValueType()) {
9083 // We know that the # elements of the results is the same as the
9084 // # elements of the compare (and the # elements of the compare result
9085 // for that matter). Check to see that they are the same size. If so,
9086 // we know that the element size of the sext'd result matches the
9087 // element size of the compare operands.
9088 if (VT.getSizeInBits() == SVT.getSizeInBits())
9089 return DAG.getSetCC(DL, VT, N00, N01, CC);
9091 // If the desired elements are smaller or larger than the source
9092 // elements, we can use a matching integer vector type and then
9093 // truncate/sign extend.
9094 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9095 if (SVT == MatchingVecType) {
9096 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9097 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9102 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9103 // Here, T can be 1 or -1, depending on the type of the setcc and
9104 // getBooleanContents().
9105 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9107 // To determine the "true" side of the select, we need to know the high bit
9108 // of the value returned by the setcc if it evaluates to true.
9109 // If the type of the setcc is i1, then the true case of the select is just
9110 // sext(i1 1), that is, -1.
9111 // If the type of the setcc is larger (say, i8) then the value of the high
9112 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9113 // of the appropriate width.
9114 SDValue ExtTrueVal = (SetCCWidth == 1)
9115 ? DAG.getAllOnesConstant(DL, VT)
9116 : DAG.getBoolConstant(true, DL, VT, N00VT);
9117 SDValue Zero = DAG.getConstant(0, DL, VT);
9118 if (SDValue SCC =
9119 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9120 return SCC;
9122 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9123 EVT SetCCVT = getSetCCResultType(N00VT);
9124 // Don't do this transform for i1 because there's a select transform
9125 // that would reverse it.
9126 // TODO: We should not do this transform at all without a target hook
9127 // because a sext is likely cheaper than a select?
9128 if (SetCCVT.getScalarSizeInBits() != 1 &&
9129 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9130 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9131 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9136 // fold (sext x) -> (zext x) if the sign bit is known zero.
9137 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9138 DAG.SignBitIsZero(N0))
9139 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9141 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9142 return NewVSel;
9144 // Eliminate this sign extend by doing a negation in the destination type:
9145 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9146 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9147 isNullOrNullSplat(N0.getOperand(0)) &&
9148 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9149 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9150 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9151 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9153 // Eliminate this sign extend by doing a decrement in the destination type:
9154 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9155 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9156 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9157 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9158 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9159 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9160 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9163 return SDValue();
9166 // isTruncateOf - If N is a truncate of some other value, return true, record
9167 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9168 // This function computes KnownBits to avoid a duplicated call to
9169 // computeKnownBits in the caller.
9170 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9171 KnownBits &Known) {
9172 if (N->getOpcode() == ISD::TRUNCATE) {
9173 Op = N->getOperand(0);
9174 Known = DAG.computeKnownBits(Op);
9175 return true;
9178 if (N.getOpcode() != ISD::SETCC ||
9179 N.getValueType().getScalarType() != MVT::i1 ||
9180 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9181 return false;
9183 SDValue Op0 = N->getOperand(0);
9184 SDValue Op1 = N->getOperand(1);
9185 assert(Op0.getValueType() == Op1.getValueType());
9187 if (isNullOrNullSplat(Op0))
9188 Op = Op1;
9189 else if (isNullOrNullSplat(Op1))
9190 Op = Op0;
9191 else
9192 return false;
9194 Known = DAG.computeKnownBits(Op);
9196 return (Known.Zero | 1).isAllOnesValue();
9199 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9200 SDValue N0 = N->getOperand(0);
9201 EVT VT = N->getValueType(0);
9203 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9204 return Res;
9206 // fold (zext (zext x)) -> (zext x)
9207 // fold (zext (aext x)) -> (zext x)
9208 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9209 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9210 N0.getOperand(0));
9212 // fold (zext (truncate x)) -> (zext x) or
9213 // (zext (truncate x)) -> (truncate x)
9214 // This is valid when the truncated bits of x are already zero.
9215 SDValue Op;
9216 KnownBits Known;
9217 if (isTruncateOf(DAG, N0, Op, Known)) {
9218 APInt TruncatedBits =
9219 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9220 APInt(Op.getScalarValueSizeInBits(), 0) :
9221 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9222 N0.getScalarValueSizeInBits(),
9223 std::min(Op.getScalarValueSizeInBits(),
9224 VT.getScalarSizeInBits()));
9225 if (TruncatedBits.isSubsetOf(Known.Zero))
9226 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9229 // fold (zext (truncate x)) -> (and x, mask)
9230 if (N0.getOpcode() == ISD::TRUNCATE) {
9231 // fold (zext (truncate (load x))) -> (zext (smaller load x))
9232 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9233 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9234 SDNode *oye = N0.getOperand(0).getNode();
9235 if (NarrowLoad.getNode() != N0.getNode()) {
9236 CombineTo(N0.getNode(), NarrowLoad);
9237 // CombineTo deleted the truncate, if needed, but not what's under it.
9238 AddToWorklist(oye);
9240 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9243 EVT SrcVT = N0.getOperand(0).getValueType();
9244 EVT MinVT = N0.getValueType();
9246 // Try to mask before the extension to avoid having to generate a larger mask,
9247 // possibly over several sub-vectors.
9248 if (SrcVT.bitsLT(VT) && VT.isVector()) {
9249 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9250 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9251 SDValue Op = N0.getOperand(0);
9252 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9253 AddToWorklist(Op.getNode());
9254 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9255 // Transfer the debug info; the new node is equivalent to N0.
9256 DAG.transferDbgValues(N0, ZExtOrTrunc);
9257 return ZExtOrTrunc;
9261 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9262 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9263 AddToWorklist(Op.getNode());
9264 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9265 // We may safely transfer the debug info describing the truncate node over
9266 // to the equivalent and operation.
9267 DAG.transferDbgValues(N0, And);
9268 return And;
9272 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9273 // if either of the casts is not free.
9274 if (N0.getOpcode() == ISD::AND &&
9275 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9276 N0.getOperand(1).getOpcode() == ISD::Constant &&
9277 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9278 N0.getValueType()) ||
9279 !TLI.isZExtFree(N0.getValueType(), VT))) {
9280 SDValue X = N0.getOperand(0).getOperand(0);
9281 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9282 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9283 Mask = Mask.zext(VT.getSizeInBits());
9284 SDLoc DL(N);
9285 return DAG.getNode(ISD::AND, DL, VT,
9286 X, DAG.getConstant(Mask, DL, VT));
9289 // Try to simplify (zext (load x)).
9290 if (SDValue foldedExt =
9291 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9292 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9293 return foldedExt;
9295 // fold (zext (load x)) to multiple smaller zextloads.
9296 // Only on illegal but splittable vectors.
9297 if (SDValue ExtLoad = CombineExtLoad(N))
9298 return ExtLoad;
9300 // fold (zext (and/or/xor (load x), cst)) ->
9301 // (and/or/xor (zextload x), (zext cst))
9302 // Unless (and (load x) cst) will match as a zextload already and has
9303 // additional users.
9304 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9305 N0.getOpcode() == ISD::XOR) &&
9306 isa<LoadSDNode>(N0.getOperand(0)) &&
9307 N0.getOperand(1).getOpcode() == ISD::Constant &&
9308 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9309 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9310 EVT MemVT = LN00->getMemoryVT();
9311 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9312 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9313 bool DoXform = true;
9314 SmallVector<SDNode*, 4> SetCCs;
9315 if (!N0.hasOneUse()) {
9316 if (N0.getOpcode() == ISD::AND) {
9317 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9318 EVT LoadResultTy = AndC->getValueType(0);
9319 EVT ExtVT;
9320 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9321 DoXform = false;
9324 if (DoXform)
9325 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9326 ISD::ZERO_EXTEND, SetCCs, TLI);
9327 if (DoXform) {
9328 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9329 LN00->getChain(), LN00->getBasePtr(),
9330 LN00->getMemoryVT(),
9331 LN00->getMemOperand());
9332 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9333 Mask = Mask.zext(VT.getSizeInBits());
9334 SDLoc DL(N);
9335 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9336 ExtLoad, DAG.getConstant(Mask, DL, VT));
9337 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9338 bool NoReplaceTruncAnd = !N0.hasOneUse();
9339 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9340 CombineTo(N, And);
9341 // If N0 has multiple uses, change other uses as well.
9342 if (NoReplaceTruncAnd) {
9343 SDValue TruncAnd =
9344 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9345 CombineTo(N0.getNode(), TruncAnd);
9347 if (NoReplaceTrunc) {
9348 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9349 } else {
9350 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9351 LN00->getValueType(0), ExtLoad);
9352 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9354 return SDValue(N,0); // Return N so it doesn't get rechecked!
9359 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9360 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9361 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9362 return ZExtLoad;
9364 // Try to simplify (zext (zextload x)).
9365 if (SDValue foldedExt = tryToFoldExtOfExtload(
9366 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9367 return foldedExt;
9369 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9370 return V;
9372 if (N0.getOpcode() == ISD::SETCC) {
9373 // Only do this before legalize for now.
9374 if (!LegalOperations && VT.isVector() &&
9375 N0.getValueType().getVectorElementType() == MVT::i1) {
9376 EVT N00VT = N0.getOperand(0).getValueType();
9377 if (getSetCCResultType(N00VT) == N0.getValueType())
9378 return SDValue();
9380 // We know that the # elements of the results is the same as the #
9381 // elements of the compare (and the # elements of the compare result for
9382 // that matter). Check to see that they are the same size. If so, we know
9383 // that the element size of the sext'd result matches the element size of
9384 // the compare operands.
9385 SDLoc DL(N);
9386 SDValue VecOnes = DAG.getConstant(1, DL, VT);
9387 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9388 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9389 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9390 N0.getOperand(1), N0.getOperand(2));
9391 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9394 // If the desired elements are smaller or larger than the source
9395 // elements we can use a matching integer vector type and then
9396 // truncate/sign extend.
9397 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9398 SDValue VsetCC =
9399 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9400 N0.getOperand(1), N0.getOperand(2));
9401 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9402 VecOnes);
9405 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9406 SDLoc DL(N);
9407 if (SDValue SCC = SimplifySelectCC(
9408 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9409 DAG.getConstant(0, DL, VT),
9410 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9411 return SCC;
9414 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9415 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9416 isa<ConstantSDNode>(N0.getOperand(1)) &&
9417 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9418 N0.hasOneUse()) {
9419 SDValue ShAmt = N0.getOperand(1);
9420 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9421 if (N0.getOpcode() == ISD::SHL) {
9422 SDValue InnerZExt = N0.getOperand(0);
9423 // If the original shl may be shifting out bits, do not perform this
9424 // transformation.
9425 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9426 InnerZExt.getOperand(0).getValueSizeInBits();
9427 if (ShAmtVal > KnownZeroBits)
9428 return SDValue();
9431 SDLoc DL(N);
9433 // Ensure that the shift amount is wide enough for the shifted value.
9434 if (VT.getSizeInBits() >= 256)
9435 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9437 return DAG.getNode(N0.getOpcode(), DL, VT,
9438 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9439 ShAmt);
9442 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9443 return NewVSel;
9445 return SDValue();
9448 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9449 SDValue N0 = N->getOperand(0);
9450 EVT VT = N->getValueType(0);
9452 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9453 return Res;
9455 // fold (aext (aext x)) -> (aext x)
9456 // fold (aext (zext x)) -> (zext x)
9457 // fold (aext (sext x)) -> (sext x)
9458 if (N0.getOpcode() == ISD::ANY_EXTEND ||
9459 N0.getOpcode() == ISD::ZERO_EXTEND ||
9460 N0.getOpcode() == ISD::SIGN_EXTEND)
9461 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9463 // fold (aext (truncate (load x))) -> (aext (smaller load x))
9464 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9465 if (N0.getOpcode() == ISD::TRUNCATE) {
9466 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9467 SDNode *oye = N0.getOperand(0).getNode();
9468 if (NarrowLoad.getNode() != N0.getNode()) {
9469 CombineTo(N0.getNode(), NarrowLoad);
9470 // CombineTo deleted the truncate, if needed, but not what's under it.
9471 AddToWorklist(oye);
9473 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9477 // fold (aext (truncate x))
9478 if (N0.getOpcode() == ISD::TRUNCATE)
9479 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9481 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9482 // if the trunc is not free.
9483 if (N0.getOpcode() == ISD::AND &&
9484 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9485 N0.getOperand(1).getOpcode() == ISD::Constant &&
9486 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9487 N0.getValueType())) {
9488 SDLoc DL(N);
9489 SDValue X = N0.getOperand(0).getOperand(0);
9490 X = DAG.getAnyExtOrTrunc(X, DL, VT);
9491 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9492 Mask = Mask.zext(VT.getSizeInBits());
9493 return DAG.getNode(ISD::AND, DL, VT,
9494 X, DAG.getConstant(Mask, DL, VT));
9497 // fold (aext (load x)) -> (aext (truncate (extload x)))
9498 // None of the supported targets knows how to perform load and any_ext
9499 // on vectors in one instruction. We only perform this transformation on
9500 // scalars.
9501 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9502 ISD::isUNINDEXEDLoad(N0.getNode()) &&
9503 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9504 bool DoXform = true;
9505 SmallVector<SDNode*, 4> SetCCs;
9506 if (!N0.hasOneUse())
9507 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9508 TLI);
9509 if (DoXform) {
9510 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9511 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9512 LN0->getChain(),
9513 LN0->getBasePtr(), N0.getValueType(),
9514 LN0->getMemOperand());
9515 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9516 // If the load value is used only by N, replace it via CombineTo N.
9517 bool NoReplaceTrunc = N0.hasOneUse();
9518 CombineTo(N, ExtLoad);
9519 if (NoReplaceTrunc) {
9520 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9521 recursivelyDeleteUnusedNodes(LN0);
9522 } else {
9523 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9524 N0.getValueType(), ExtLoad);
9525 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9527 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9531 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9532 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9533 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
9534 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9535 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9536 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9537 ISD::LoadExtType ExtType = LN0->getExtensionType();
9538 EVT MemVT = LN0->getMemoryVT();
9539 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9540 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9541 VT, LN0->getChain(), LN0->getBasePtr(),
9542 MemVT, LN0->getMemOperand());
9543 CombineTo(N, ExtLoad);
9544 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9545 recursivelyDeleteUnusedNodes(LN0);
9546 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9550 if (N0.getOpcode() == ISD::SETCC) {
9551 // For vectors:
9552 // aext(setcc) -> vsetcc
9553 // aext(setcc) -> truncate(vsetcc)
9554 // aext(setcc) -> aext(vsetcc)
9555 // Only do this before legalize for now.
9556 if (VT.isVector() && !LegalOperations) {
9557 EVT N00VT = N0.getOperand(0).getValueType();
9558 if (getSetCCResultType(N00VT) == N0.getValueType())
9559 return SDValue();
9561 // We know that the # elements of the results is the same as the
9562 // # elements of the compare (and the # elements of the compare result
9563 // for that matter). Check to see that they are the same size. If so,
9564 // we know that the element size of the sext'd result matches the
9565 // element size of the compare operands.
9566 if (VT.getSizeInBits() == N00VT.getSizeInBits())
9567 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9568 N0.getOperand(1),
9569 cast<CondCodeSDNode>(N0.getOperand(2))->get());
9571 // If the desired elements are smaller or larger than the source
9572 // elements we can use a matching integer vector type and then
9573 // truncate/any extend
9574 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9575 SDValue VsetCC =
9576 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9577 N0.getOperand(1),
9578 cast<CondCodeSDNode>(N0.getOperand(2))->get());
9579 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9582 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9583 SDLoc DL(N);
9584 if (SDValue SCC = SimplifySelectCC(
9585 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9586 DAG.getConstant(0, DL, VT),
9587 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9588 return SCC;
9591 return SDValue();
9594 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9595 unsigned Opcode = N->getOpcode();
9596 SDValue N0 = N->getOperand(0);
9597 SDValue N1 = N->getOperand(1);
9598 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9600 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9601 if (N0.getOpcode() == Opcode &&
9602 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9603 return N0;
9605 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9606 N0.getOperand(0).getOpcode() == Opcode) {
9607 // We have an assert, truncate, assert sandwich. Make one stronger assert
9608 // by asserting on the smallest asserted type to the larger source type.
9609 // This eliminates the later assert:
9610 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9611 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9612 SDValue BigA = N0.getOperand(0);
9613 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9614 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9615 "Asserting zero/sign-extended bits to a type larger than the "
9616 "truncated destination does not provide information");
9618 SDLoc DL(N);
9619 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9620 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9621 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9622 BigA.getOperand(0), MinAssertVTVal);
9623 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9626 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9627 // than X. Just move the AssertZext in front of the truncate and drop the
9628 // AssertSExt.
9629 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9630 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9631 Opcode == ISD::AssertZext) {
9632 SDValue BigA = N0.getOperand(0);
9633 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9634 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9635 "Asserting zero/sign-extended bits to a type larger than the "
9636 "truncated destination does not provide information");
9638 if (AssertVT.bitsLT(BigA_AssertVT)) {
9639 SDLoc DL(N);
9640 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9641 BigA.getOperand(0), N1);
9642 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9646 return SDValue();
9649 /// If the result of a wider load is shifted to right of N bits and then
9650 /// truncated to a narrower type and where N is a multiple of number of bits of
9651 /// the narrower type, transform it to a narrower load from address + N / num of
9652 /// bits of new type. Also narrow the load if the result is masked with an AND
9653 /// to effectively produce a smaller type. If the result is to be extended, also
9654 /// fold the extension to form a extending load.
9655 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9656 unsigned Opc = N->getOpcode();
9658 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9659 SDValue N0 = N->getOperand(0);
9660 EVT VT = N->getValueType(0);
9661 EVT ExtVT = VT;
9663 // This transformation isn't valid for vector loads.
9664 if (VT.isVector())
9665 return SDValue();
9667 unsigned ShAmt = 0;
9668 bool HasShiftedOffset = false;
9669 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9670 // extended to VT.
9671 if (Opc == ISD::SIGN_EXTEND_INREG) {
9672 ExtType = ISD::SEXTLOAD;
9673 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9674 } else if (Opc == ISD::SRL) {
9675 // Another special-case: SRL is basically zero-extending a narrower value,
9676 // or it maybe shifting a higher subword, half or byte into the lowest
9677 // bits.
9678 ExtType = ISD::ZEXTLOAD;
9679 N0 = SDValue(N, 0);
9681 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9682 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9683 if (!N01 || !LN0)
9684 return SDValue();
9686 uint64_t ShiftAmt = N01->getZExtValue();
9687 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9688 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9689 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9690 else
9691 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9692 VT.getSizeInBits() - ShiftAmt);
9693 } else if (Opc == ISD::AND) {
9694 // An AND with a constant mask is the same as a truncate + zero-extend.
9695 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9696 if (!AndC)
9697 return SDValue();
9699 const APInt &Mask = AndC->getAPIntValue();
9700 unsigned ActiveBits = 0;
9701 if (Mask.isMask()) {
9702 ActiveBits = Mask.countTrailingOnes();
9703 } else if (Mask.isShiftedMask()) {
9704 ShAmt = Mask.countTrailingZeros();
9705 APInt ShiftedMask = Mask.lshr(ShAmt);
9706 ActiveBits = ShiftedMask.countTrailingOnes();
9707 HasShiftedOffset = true;
9708 } else
9709 return SDValue();
9711 ExtType = ISD::ZEXTLOAD;
9712 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9715 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9716 SDValue SRL = N0;
9717 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9718 ShAmt = ConstShift->getZExtValue();
9719 unsigned EVTBits = ExtVT.getSizeInBits();
9720 // Is the shift amount a multiple of size of VT?
9721 if ((ShAmt & (EVTBits-1)) == 0) {
9722 N0 = N0.getOperand(0);
9723 // Is the load width a multiple of size of VT?
9724 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9725 return SDValue();
9728 // At this point, we must have a load or else we can't do the transform.
9729 if (!isa<LoadSDNode>(N0)) return SDValue();
9731 auto *LN0 = cast<LoadSDNode>(N0);
9733 // Because a SRL must be assumed to *need* to zero-extend the high bits
9734 // (as opposed to anyext the high bits), we can't combine the zextload
9735 // lowering of SRL and an sextload.
9736 if (LN0->getExtensionType() == ISD::SEXTLOAD)
9737 return SDValue();
9739 // If the shift amount is larger than the input type then we're not
9740 // accessing any of the loaded bytes. If the load was a zextload/extload
9741 // then the result of the shift+trunc is zero/undef (handled elsewhere).
9742 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9743 return SDValue();
9745 // If the SRL is only used by a masking AND, we may be able to adjust
9746 // the ExtVT to make the AND redundant.
9747 SDNode *Mask = *(SRL->use_begin());
9748 if (Mask->getOpcode() == ISD::AND &&
9749 isa<ConstantSDNode>(Mask->getOperand(1))) {
9750 const APInt &ShiftMask =
9751 cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9752 if (ShiftMask.isMask()) {
9753 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9754 ShiftMask.countTrailingOnes());
9755 // If the mask is smaller, recompute the type.
9756 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9757 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9758 ExtVT = MaskedVT;
9764 // If the load is shifted left (and the result isn't shifted back right),
9765 // we can fold the truncate through the shift.
9766 unsigned ShLeftAmt = 0;
9767 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9768 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9769 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9770 ShLeftAmt = N01->getZExtValue();
9771 N0 = N0.getOperand(0);
9775 // If we haven't found a load, we can't narrow it.
9776 if (!isa<LoadSDNode>(N0))
9777 return SDValue();
9779 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9780 if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9781 return SDValue();
9783 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9784 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9785 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9786 return LVTStoreBits - EVTStoreBits - ShAmt;
9789 // For big endian targets, we need to adjust the offset to the pointer to
9790 // load the correct bytes.
9791 if (DAG.getDataLayout().isBigEndian())
9792 ShAmt = AdjustBigEndianShift(ShAmt);
9794 EVT PtrType = N0.getOperand(1).getValueType();
9795 uint64_t PtrOff = ShAmt / 8;
9796 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9797 SDLoc DL(LN0);
9798 // The original load itself didn't wrap, so an offset within it doesn't.
9799 SDNodeFlags Flags;
9800 Flags.setNoUnsignedWrap(true);
9801 SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9802 PtrType, LN0->getBasePtr(),
9803 DAG.getConstant(PtrOff, DL, PtrType),
9804 Flags);
9805 AddToWorklist(NewPtr.getNode());
9807 SDValue Load;
9808 if (ExtType == ISD::NON_EXTLOAD)
9809 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9810 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9811 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9812 else
9813 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9814 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9815 NewAlign, LN0->getMemOperand()->getFlags(),
9816 LN0->getAAInfo());
9818 // Replace the old load's chain with the new load's chain.
9819 WorklistRemover DeadNodes(*this);
9820 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9822 // Shift the result left, if we've swallowed a left shift.
9823 SDValue Result = Load;
9824 if (ShLeftAmt != 0) {
9825 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9826 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9827 ShImmTy = VT;
9828 // If the shift amount is as large as the result size (but, presumably,
9829 // no larger than the source) then the useful bits of the result are
9830 // zero; we can't simply return the shortened shift, because the result
9831 // of that operation is undefined.
9832 SDLoc DL(N0);
9833 if (ShLeftAmt >= VT.getSizeInBits())
9834 Result = DAG.getConstant(0, DL, VT);
9835 else
9836 Result = DAG.getNode(ISD::SHL, DL, VT,
9837 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9840 if (HasShiftedOffset) {
9841 // Recalculate the shift amount after it has been altered to calculate
9842 // the offset.
9843 if (DAG.getDataLayout().isBigEndian())
9844 ShAmt = AdjustBigEndianShift(ShAmt);
9846 // We're using a shifted mask, so the load now has an offset. This means
9847 // that data has been loaded into the lower bytes than it would have been
9848 // before, so we need to shl the loaded data into the correct position in the
9849 // register.
9850 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9851 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9852 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9855 // Return the new loaded value.
9856 return Result;
9859 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9860 SDValue N0 = N->getOperand(0);
9861 SDValue N1 = N->getOperand(1);
9862 EVT VT = N->getValueType(0);
9863 EVT EVT = cast<VTSDNode>(N1)->getVT();
9864 unsigned VTBits = VT.getScalarSizeInBits();
9865 unsigned EVTBits = EVT.getScalarSizeInBits();
9867 if (N0.isUndef())
9868 return DAG.getUNDEF(VT);
9870 // fold (sext_in_reg c1) -> c1
9871 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9872 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9874 // If the input is already sign extended, just drop the extension.
9875 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9876 return N0;
9878 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9879 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9880 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9881 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9882 N0.getOperand(0), N1);
9884 // fold (sext_in_reg (sext x)) -> (sext x)
9885 // fold (sext_in_reg (aext x)) -> (sext x)
9886 // if x is small enough or if we know that x has more than 1 sign bit and the
9887 // sign_extend_inreg is extending from one of them.
9888 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9889 SDValue N00 = N0.getOperand(0);
9890 unsigned N00Bits = N00.getScalarValueSizeInBits();
9891 if ((N00Bits <= EVTBits ||
9892 (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9893 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9894 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9897 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9898 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9899 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9900 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9901 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9902 if (!LegalOperations ||
9903 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9904 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9905 N0.getOperand(0));
9908 // fold (sext_in_reg (zext x)) -> (sext x)
9909 // iff we are extending the source sign bit.
9910 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9911 SDValue N00 = N0.getOperand(0);
9912 if (N00.getScalarValueSizeInBits() == EVTBits &&
9913 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9914 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9917 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9918 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9919 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9921 // fold operands of sext_in_reg based on knowledge that the top bits are not
9922 // demanded.
9923 if (SimplifyDemandedBits(SDValue(N, 0)))
9924 return SDValue(N, 0);
9926 // fold (sext_in_reg (load x)) -> (smaller sextload x)
9927 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9928 if (SDValue NarrowLoad = ReduceLoadWidth(N))
9929 return NarrowLoad;
9931 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9932 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9933 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9934 if (N0.getOpcode() == ISD::SRL) {
9935 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9936 if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9937 // We can turn this into an SRA iff the input to the SRL is already sign
9938 // extended enough.
9939 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9940 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9941 return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9942 N0.getOperand(0), N0.getOperand(1));
9946 // fold (sext_inreg (extload x)) -> (sextload x)
9947 // If sextload is not supported by target, we can only do the combine when
9948 // load has one use. Doing otherwise can block folding the extload with other
9949 // extends that the target does support.
9950 if (ISD::isEXTLoad(N0.getNode()) &&
9951 ISD::isUNINDEXEDLoad(N0.getNode()) &&
9952 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9953 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9954 N0.hasOneUse()) ||
9955 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9956 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9957 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9958 LN0->getChain(),
9959 LN0->getBasePtr(), EVT,
9960 LN0->getMemOperand());
9961 CombineTo(N, ExtLoad);
9962 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9963 AddToWorklist(ExtLoad.getNode());
9964 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9966 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9967 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9968 N0.hasOneUse() &&
9969 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9970 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9971 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9972 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9973 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9974 LN0->getChain(),
9975 LN0->getBasePtr(), EVT,
9976 LN0->getMemOperand());
9977 CombineTo(N, ExtLoad);
9978 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9979 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9982 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9983 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9984 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9985 N0.getOperand(1), false))
9986 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9987 BSwap, N1);
9990 return SDValue();
9993 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9994 SDValue N0 = N->getOperand(0);
9995 EVT VT = N->getValueType(0);
9997 if (N0.isUndef())
9998 return DAG.getUNDEF(VT);
10000 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10001 return Res;
10003 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10004 return SDValue(N, 0);
10006 return SDValue();
10009 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10010 SDValue N0 = N->getOperand(0);
10011 EVT VT = N->getValueType(0);
10013 if (N0.isUndef())
10014 return DAG.getUNDEF(VT);
10016 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10017 return Res;
10019 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10020 return SDValue(N, 0);
10022 return SDValue();
10025 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10026 SDValue N0 = N->getOperand(0);
10027 EVT VT = N->getValueType(0);
10028 EVT SrcVT = N0.getValueType();
10029 bool isLE = DAG.getDataLayout().isLittleEndian();
10031 // noop truncate
10032 if (SrcVT == VT)
10033 return N0;
10035 // fold (truncate (truncate x)) -> (truncate x)
10036 if (N0.getOpcode() == ISD::TRUNCATE)
10037 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10039 // fold (truncate c1) -> c1
10040 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10041 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10042 if (C.getNode() != N)
10043 return C;
10046 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10047 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10048 N0.getOpcode() == ISD::SIGN_EXTEND ||
10049 N0.getOpcode() == ISD::ANY_EXTEND) {
10050 // if the source is smaller than the dest, we still need an extend.
10051 if (N0.getOperand(0).getValueType().bitsLT(VT))
10052 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10053 // if the source is larger than the dest, than we just need the truncate.
10054 if (N0.getOperand(0).getValueType().bitsGT(VT))
10055 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10056 // if the source and dest are the same type, we can drop both the extend
10057 // and the truncate.
10058 return N0.getOperand(0);
10061 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10062 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10063 return SDValue();
10065 // Fold extract-and-trunc into a narrow extract. For example:
10066 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10067 // i32 y = TRUNCATE(i64 x)
10068 // -- becomes --
10069 // v16i8 b = BITCAST (v2i64 val)
10070 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10072 // Note: We only run this optimization after type legalization (which often
10073 // creates this pattern) and before operation legalization after which
10074 // we need to be more careful about the vector instructions that we generate.
10075 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10076 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10077 EVT VecTy = N0.getOperand(0).getValueType();
10078 EVT ExTy = N0.getValueType();
10079 EVT TrTy = N->getValueType(0);
10081 unsigned NumElem = VecTy.getVectorNumElements();
10082 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10084 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10085 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10087 SDValue EltNo = N0->getOperand(1);
10088 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10089 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10090 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10091 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10093 SDLoc DL(N);
10094 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10095 DAG.getBitcast(NVT, N0.getOperand(0)),
10096 DAG.getConstant(Index, DL, IndexTy));
10100 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10101 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10102 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10103 TLI.isTruncateFree(SrcVT, VT)) {
10104 SDLoc SL(N0);
10105 SDValue Cond = N0.getOperand(0);
10106 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10107 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10108 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10112 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10113 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10114 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
10115 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10116 SDValue Amt = N0.getOperand(1);
10117 KnownBits Known = DAG.computeKnownBits(Amt);
10118 unsigned Size = VT.getScalarSizeInBits();
10119 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10120 SDLoc SL(N);
10121 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10123 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10124 if (AmtVT != Amt.getValueType()) {
10125 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10126 AddToWorklist(Amt.getNode());
10128 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10132 // Attempt to pre-truncate BUILD_VECTOR sources.
10133 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10134 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10135 SDLoc DL(N);
10136 EVT SVT = VT.getScalarType();
10137 SmallVector<SDValue, 8> TruncOps;
10138 for (const SDValue &Op : N0->op_values()) {
10139 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10140 TruncOps.push_back(TruncOp);
10142 return DAG.getBuildVector(VT, DL, TruncOps);
10145 // Fold a series of buildvector, bitcast, and truncate if possible.
10146 // For example fold
10147 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10148 // (2xi32 (buildvector x, y)).
10149 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10150 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10151 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10152 N0.getOperand(0).hasOneUse()) {
10153 SDValue BuildVect = N0.getOperand(0);
10154 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10155 EVT TruncVecEltTy = VT.getVectorElementType();
10157 // Check that the element types match.
10158 if (BuildVectEltTy == TruncVecEltTy) {
10159 // Now we only need to compute the offset of the truncated elements.
10160 unsigned BuildVecNumElts = BuildVect.getNumOperands();
10161 unsigned TruncVecNumElts = VT.getVectorNumElements();
10162 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10164 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10165 "Invalid number of elements");
10167 SmallVector<SDValue, 8> Opnds;
10168 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10169 Opnds.push_back(BuildVect.getOperand(i));
10171 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10175 // See if we can simplify the input to this truncate through knowledge that
10176 // only the low bits are being used.
10177 // For example "trunc (or (shl x, 8), y)" // -> trunc y
10178 // Currently we only perform this optimization on scalars because vectors
10179 // may have different active low bits.
10180 if (!VT.isVector()) {
10181 APInt Mask =
10182 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10183 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10184 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10187 // fold (truncate (load x)) -> (smaller load x)
10188 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10189 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10190 if (SDValue Reduced = ReduceLoadWidth(N))
10191 return Reduced;
10193 // Handle the case where the load remains an extending load even
10194 // after truncation.
10195 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10196 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10197 if (!LN0->isVolatile() &&
10198 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10199 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10200 VT, LN0->getChain(), LN0->getBasePtr(),
10201 LN0->getMemoryVT(),
10202 LN0->getMemOperand());
10203 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10204 return NewLoad;
10209 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10210 // where ... are all 'undef'.
10211 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10212 SmallVector<EVT, 8> VTs;
10213 SDValue V;
10214 unsigned Idx = 0;
10215 unsigned NumDefs = 0;
10217 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10218 SDValue X = N0.getOperand(i);
10219 if (!X.isUndef()) {
10220 V = X;
10221 Idx = i;
10222 NumDefs++;
10224 // Stop if more than one members are non-undef.
10225 if (NumDefs > 1)
10226 break;
10227 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10228 VT.getVectorElementType(),
10229 X.getValueType().getVectorNumElements()));
10232 if (NumDefs == 0)
10233 return DAG.getUNDEF(VT);
10235 if (NumDefs == 1) {
10236 assert(V.getNode() && "The single defined operand is empty!");
10237 SmallVector<SDValue, 8> Opnds;
10238 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10239 if (i != Idx) {
10240 Opnds.push_back(DAG.getUNDEF(VTs[i]));
10241 continue;
10243 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10244 AddToWorklist(NV.getNode());
10245 Opnds.push_back(NV);
10247 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10251 // Fold truncate of a bitcast of a vector to an extract of the low vector
10252 // element.
10254 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10255 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10256 SDValue VecSrc = N0.getOperand(0);
10257 EVT SrcVT = VecSrc.getValueType();
10258 if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10259 (!LegalOperations ||
10260 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10261 SDLoc SL(N);
10263 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10264 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10265 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10266 VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10270 // Simplify the operands using demanded-bits information.
10271 if (!VT.isVector() &&
10272 SimplifyDemandedBits(SDValue(N, 0)))
10273 return SDValue(N, 0);
10275 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10276 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10277 // When the adde's carry is not used.
10278 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10279 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10280 // We only do for addcarry before legalize operation
10281 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10282 TLI.isOperationLegal(N0.getOpcode(), VT))) {
10283 SDLoc SL(N);
10284 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10285 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10286 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10287 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10290 // fold (truncate (extract_subvector(ext x))) ->
10291 // (extract_subvector x)
10292 // TODO: This can be generalized to cover cases where the truncate and extract
10293 // do not fully cancel each other out.
10294 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10295 SDValue N00 = N0.getOperand(0);
10296 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10297 N00.getOpcode() == ISD::ZERO_EXTEND ||
10298 N00.getOpcode() == ISD::ANY_EXTEND) {
10299 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10300 VT.getVectorElementType())
10301 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10302 N00.getOperand(0), N0.getOperand(1));
10306 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10307 return NewVSel;
10309 // Narrow a suitable binary operation with a non-opaque constant operand by
10310 // moving it ahead of the truncate. This is limited to pre-legalization
10311 // because targets may prefer a wider type during later combines and invert
10312 // this transform.
10313 switch (N0.getOpcode()) {
10314 case ISD::ADD:
10315 case ISD::SUB:
10316 case ISD::MUL:
10317 case ISD::AND:
10318 case ISD::OR:
10319 case ISD::XOR:
10320 if (!LegalOperations && N0.hasOneUse() &&
10321 (isConstantOrConstantVector(N0.getOperand(0), true) ||
10322 isConstantOrConstantVector(N0.getOperand(1), true))) {
10323 // TODO: We already restricted this to pre-legalization, but for vectors
10324 // we are extra cautious to not create an unsupported operation.
10325 // Target-specific changes are likely needed to avoid regressions here.
10326 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10327 SDLoc DL(N);
10328 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10329 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10330 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10335 return SDValue();
10338 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10339 SDValue Elt = N->getOperand(i);
10340 if (Elt.getOpcode() != ISD::MERGE_VALUES)
10341 return Elt.getNode();
10342 return Elt.getOperand(Elt.getResNo()).getNode();
10345 /// build_pair (load, load) -> load
10346 /// if load locations are consecutive.
10347 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10348 assert(N->getOpcode() == ISD::BUILD_PAIR);
10350 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10351 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10353 // A BUILD_PAIR is always having the least significant part in elt 0 and the
10354 // most significant part in elt 1. So when combining into one large load, we
10355 // need to consider the endianness.
10356 if (DAG.getDataLayout().isBigEndian())
10357 std::swap(LD1, LD2);
10359 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10360 LD1->getAddressSpace() != LD2->getAddressSpace())
10361 return SDValue();
10362 EVT LD1VT = LD1->getValueType(0);
10363 unsigned LD1Bytes = LD1VT.getStoreSize();
10364 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10365 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10366 unsigned Align = LD1->getAlignment();
10367 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10368 VT.getTypeForEVT(*DAG.getContext()));
10370 if (NewAlign <= Align &&
10371 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10372 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10373 LD1->getPointerInfo(), Align);
10376 return SDValue();
10379 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10380 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10381 // and Lo parts; on big-endian machines it doesn't.
10382 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10385 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10386 const TargetLowering &TLI) {
10387 // If this is not a bitcast to an FP type or if the target doesn't have
10388 // IEEE754-compliant FP logic, we're done.
10389 EVT VT = N->getValueType(0);
10390 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10391 return SDValue();
10393 // TODO: Handle cases where the integer constant is a different scalar
10394 // bitwidth to the FP.
10395 SDValue N0 = N->getOperand(0);
10396 EVT SourceVT = N0.getValueType();
10397 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10398 return SDValue();
10400 unsigned FPOpcode;
10401 APInt SignMask;
10402 switch (N0.getOpcode()) {
10403 case ISD::AND:
10404 FPOpcode = ISD::FABS;
10405 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10406 break;
10407 case ISD::XOR:
10408 FPOpcode = ISD::FNEG;
10409 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10410 break;
10411 case ISD::OR:
10412 FPOpcode = ISD::FABS;
10413 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10414 break;
10415 default:
10416 return SDValue();
10419 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10420 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10421 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10422 // fneg (fabs X)
10423 SDValue LogicOp0 = N0.getOperand(0);
10424 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10425 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10426 LogicOp0.getOpcode() == ISD::BITCAST &&
10427 LogicOp0.getOperand(0).getValueType() == VT) {
10428 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10429 NumFPLogicOpsConv++;
10430 if (N0.getOpcode() == ISD::OR)
10431 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10432 return FPOp;
10435 return SDValue();
10438 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10439 SDValue N0 = N->getOperand(0);
10440 EVT VT = N->getValueType(0);
10442 if (N0.isUndef())
10443 return DAG.getUNDEF(VT);
10445 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10446 // Only do this before legalize types, unless both types are integer and the
10447 // scalar type is legal. Only do this before legalize ops, since the target
10448 // maybe depending on the bitcast.
10449 // First check to see if this is all constant.
10450 // TODO: Support FP bitcasts after legalize types.
10451 if (VT.isVector() &&
10452 (!LegalTypes ||
10453 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
10454 TLI.isTypeLegal(VT.getVectorElementType()))) &&
10455 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10456 cast<BuildVectorSDNode>(N0)->isConstant())
10457 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10458 VT.getVectorElementType());
10460 // If the input is a constant, let getNode fold it.
10461 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10462 // If we can't allow illegal operations, we need to check that this is just
10463 // a fp -> int or int -> conversion and that the resulting operation will
10464 // be legal.
10465 if (!LegalOperations ||
10466 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10467 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10468 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10469 TLI.isOperationLegal(ISD::Constant, VT))) {
10470 SDValue C = DAG.getBitcast(VT, N0);
10471 if (C.getNode() != N)
10472 return C;
10476 // (conv (conv x, t1), t2) -> (conv x, t2)
10477 if (N0.getOpcode() == ISD::BITCAST)
10478 return DAG.getBitcast(VT, N0.getOperand(0));
10480 // fold (conv (load x)) -> (load (conv*)x)
10481 // If the resultant load doesn't need a higher alignment than the original!
10482 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10483 // Do not remove the cast if the types differ in endian layout.
10484 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10485 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10486 // If the load is volatile, we only want to change the load type if the
10487 // resulting load is legal. Otherwise we might increase the number of
10488 // memory accesses. We don't care if the original type was legal or not
10489 // as we assume software couldn't rely on the number of accesses of an
10490 // illegal type.
10491 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10492 TLI.isOperationLegal(ISD::LOAD, VT)) &&
10493 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10494 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10495 unsigned OrigAlign = LN0->getAlignment();
10497 bool Fast = false;
10498 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10499 LN0->getAddressSpace(), OrigAlign, &Fast) &&
10500 Fast) {
10501 SDValue Load =
10502 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10503 LN0->getPointerInfo(), OrigAlign,
10504 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10505 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10506 return Load;
10510 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10511 return V;
10513 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10514 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10516 // For ppc_fp128:
10517 // fold (bitcast (fneg x)) ->
10518 // flipbit = signbit
10519 // (xor (bitcast x) (build_pair flipbit, flipbit))
10521 // fold (bitcast (fabs x)) ->
10522 // flipbit = (and (extract_element (bitcast x), 0), signbit)
10523 // (xor (bitcast x) (build_pair flipbit, flipbit))
10524 // This often reduces constant pool loads.
10525 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10526 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10527 N0.getNode()->hasOneUse() && VT.isInteger() &&
10528 !VT.isVector() && !N0.getValueType().isVector()) {
10529 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10530 AddToWorklist(NewConv.getNode());
10532 SDLoc DL(N);
10533 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10534 assert(VT.getSizeInBits() == 128);
10535 SDValue SignBit = DAG.getConstant(
10536 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10537 SDValue FlipBit;
10538 if (N0.getOpcode() == ISD::FNEG) {
10539 FlipBit = SignBit;
10540 AddToWorklist(FlipBit.getNode());
10541 } else {
10542 assert(N0.getOpcode() == ISD::FABS);
10543 SDValue Hi =
10544 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10545 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10546 SDLoc(NewConv)));
10547 AddToWorklist(Hi.getNode());
10548 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10549 AddToWorklist(FlipBit.getNode());
10551 SDValue FlipBits =
10552 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10553 AddToWorklist(FlipBits.getNode());
10554 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10556 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10557 if (N0.getOpcode() == ISD::FNEG)
10558 return DAG.getNode(ISD::XOR, DL, VT,
10559 NewConv, DAG.getConstant(SignBit, DL, VT));
10560 assert(N0.getOpcode() == ISD::FABS);
10561 return DAG.getNode(ISD::AND, DL, VT,
10562 NewConv, DAG.getConstant(~SignBit, DL, VT));
10565 // fold (bitconvert (fcopysign cst, x)) ->
10566 // (or (and (bitconvert x), sign), (and cst, (not sign)))
10567 // Note that we don't handle (copysign x, cst) because this can always be
10568 // folded to an fneg or fabs.
10570 // For ppc_fp128:
10571 // fold (bitcast (fcopysign cst, x)) ->
10572 // flipbit = (and (extract_element
10573 // (xor (bitcast cst), (bitcast x)), 0),
10574 // signbit)
10575 // (xor (bitcast cst) (build_pair flipbit, flipbit))
10576 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10577 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10578 VT.isInteger() && !VT.isVector()) {
10579 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10580 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10581 if (isTypeLegal(IntXVT)) {
10582 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10583 AddToWorklist(X.getNode());
10585 // If X has a different width than the result/lhs, sext it or truncate it.
10586 unsigned VTWidth = VT.getSizeInBits();
10587 if (OrigXWidth < VTWidth) {
10588 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10589 AddToWorklist(X.getNode());
10590 } else if (OrigXWidth > VTWidth) {
10591 // To get the sign bit in the right place, we have to shift it right
10592 // before truncating.
10593 SDLoc DL(X);
10594 X = DAG.getNode(ISD::SRL, DL,
10595 X.getValueType(), X,
10596 DAG.getConstant(OrigXWidth-VTWidth, DL,
10597 X.getValueType()));
10598 AddToWorklist(X.getNode());
10599 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10600 AddToWorklist(X.getNode());
10603 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10604 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10605 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10606 AddToWorklist(Cst.getNode());
10607 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10608 AddToWorklist(X.getNode());
10609 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10610 AddToWorklist(XorResult.getNode());
10611 SDValue XorResult64 = DAG.getNode(
10612 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10613 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10614 SDLoc(XorResult)));
10615 AddToWorklist(XorResult64.getNode());
10616 SDValue FlipBit =
10617 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10618 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10619 AddToWorklist(FlipBit.getNode());
10620 SDValue FlipBits =
10621 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10622 AddToWorklist(FlipBits.getNode());
10623 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10625 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10626 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10627 X, DAG.getConstant(SignBit, SDLoc(X), VT));
10628 AddToWorklist(X.getNode());
10630 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10631 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10632 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10633 AddToWorklist(Cst.getNode());
10635 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10639 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10640 if (N0.getOpcode() == ISD::BUILD_PAIR)
10641 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10642 return CombineLD;
10644 // Remove double bitcasts from shuffles - this is often a legacy of
10645 // XformToShuffleWithZero being used to combine bitmaskings (of
10646 // float vectors bitcast to integer vectors) into shuffles.
10647 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10648 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10649 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10650 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10651 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10652 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10654 // If operands are a bitcast, peek through if it casts the original VT.
10655 // If operands are a constant, just bitcast back to original VT.
10656 auto PeekThroughBitcast = [&](SDValue Op) {
10657 if (Op.getOpcode() == ISD::BITCAST &&
10658 Op.getOperand(0).getValueType() == VT)
10659 return SDValue(Op.getOperand(0));
10660 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10661 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10662 return DAG.getBitcast(VT, Op);
10663 return SDValue();
10666 // FIXME: If either input vector is bitcast, try to convert the shuffle to
10667 // the result type of this bitcast. This would eliminate at least one
10668 // bitcast. See the transform in InstCombine.
10669 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10670 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10671 if (!(SV0 && SV1))
10672 return SDValue();
10674 int MaskScale =
10675 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10676 SmallVector<int, 8> NewMask;
10677 for (int M : SVN->getMask())
10678 for (int i = 0; i != MaskScale; ++i)
10679 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10681 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10682 if (!LegalMask) {
10683 std::swap(SV0, SV1);
10684 ShuffleVectorSDNode::commuteMask(NewMask);
10685 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10688 if (LegalMask)
10689 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10692 return SDValue();
10695 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10696 EVT VT = N->getValueType(0);
10697 return CombineConsecutiveLoads(N, VT);
10700 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10701 /// operands. DstEltVT indicates the destination element value type.
10702 SDValue DAGCombiner::
10703 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10704 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10706 // If this is already the right type, we're done.
10707 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10709 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10710 unsigned DstBitSize = DstEltVT.getSizeInBits();
10712 // If this is a conversion of N elements of one type to N elements of another
10713 // type, convert each element. This handles FP<->INT cases.
10714 if (SrcBitSize == DstBitSize) {
10715 SmallVector<SDValue, 8> Ops;
10716 for (SDValue Op : BV->op_values()) {
10717 // If the vector element type is not legal, the BUILD_VECTOR operands
10718 // are promoted and implicitly truncated. Make that explicit here.
10719 if (Op.getValueType() != SrcEltVT)
10720 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10721 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10722 AddToWorklist(Ops.back().getNode());
10724 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10725 BV->getValueType(0).getVectorNumElements());
10726 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10729 // Otherwise, we're growing or shrinking the elements. To avoid having to
10730 // handle annoying details of growing/shrinking FP values, we convert them to
10731 // int first.
10732 if (SrcEltVT.isFloatingPoint()) {
10733 // Convert the input float vector to a int vector where the elements are the
10734 // same sizes.
10735 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10736 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10737 SrcEltVT = IntVT;
10740 // Now we know the input is an integer vector. If the output is a FP type,
10741 // convert to integer first, then to FP of the right size.
10742 if (DstEltVT.isFloatingPoint()) {
10743 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10744 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10746 // Next, convert to FP elements of the same size.
10747 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10750 SDLoc DL(BV);
10752 // Okay, we know the src/dst types are both integers of differing types.
10753 // Handling growing first.
10754 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10755 if (SrcBitSize < DstBitSize) {
10756 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10758 SmallVector<SDValue, 8> Ops;
10759 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10760 i += NumInputsPerOutput) {
10761 bool isLE = DAG.getDataLayout().isLittleEndian();
10762 APInt NewBits = APInt(DstBitSize, 0);
10763 bool EltIsUndef = true;
10764 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10765 // Shift the previously computed bits over.
10766 NewBits <<= SrcBitSize;
10767 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10768 if (Op.isUndef()) continue;
10769 EltIsUndef = false;
10771 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10772 zextOrTrunc(SrcBitSize).zext(DstBitSize);
10775 if (EltIsUndef)
10776 Ops.push_back(DAG.getUNDEF(DstEltVT));
10777 else
10778 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10781 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10782 return DAG.getBuildVector(VT, DL, Ops);
10785 // Finally, this must be the case where we are shrinking elements: each input
10786 // turns into multiple outputs.
10787 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10788 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10789 NumOutputsPerInput*BV->getNumOperands());
10790 SmallVector<SDValue, 8> Ops;
10792 for (const SDValue &Op : BV->op_values()) {
10793 if (Op.isUndef()) {
10794 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10795 continue;
10798 APInt OpVal = cast<ConstantSDNode>(Op)->
10799 getAPIntValue().zextOrTrunc(SrcBitSize);
10801 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10802 APInt ThisVal = OpVal.trunc(DstBitSize);
10803 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10804 OpVal.lshrInPlace(DstBitSize);
10807 // For big endian targets, swap the order of the pieces of each element.
10808 if (DAG.getDataLayout().isBigEndian())
10809 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10812 return DAG.getBuildVector(VT, DL, Ops);
10815 static bool isContractable(SDNode *N) {
10816 SDNodeFlags F = N->getFlags();
10817 return F.hasAllowContract() || F.hasAllowReassociation();
10820 /// Try to perform FMA combining on a given FADD node.
10821 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10822 SDValue N0 = N->getOperand(0);
10823 SDValue N1 = N->getOperand(1);
10824 EVT VT = N->getValueType(0);
10825 SDLoc SL(N);
10827 const TargetOptions &Options = DAG.getTarget().Options;
10829 // Floating-point multiply-add with intermediate rounding.
10830 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10832 // Floating-point multiply-add without intermediate rounding.
10833 bool HasFMA =
10834 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10835 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10837 // No valid opcode, do not combine.
10838 if (!HasFMAD && !HasFMA)
10839 return SDValue();
10841 SDNodeFlags Flags = N->getFlags();
10842 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10843 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10844 CanFuse || HasFMAD);
10845 // If the addition is not contractable, do not combine.
10846 if (!AllowFusionGlobally && !isContractable(N))
10847 return SDValue();
10849 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10850 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10851 return SDValue();
10853 // Always prefer FMAD to FMA for precision.
10854 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10855 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10857 // Is the node an FMUL and contractable either due to global flags or
10858 // SDNodeFlags.
10859 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10860 if (N.getOpcode() != ISD::FMUL)
10861 return false;
10862 return AllowFusionGlobally || isContractable(N.getNode());
10864 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10865 // prefer to fold the multiply with fewer uses.
10866 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10867 if (N0.getNode()->use_size() > N1.getNode()->use_size())
10868 std::swap(N0, N1);
10871 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10872 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10873 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10874 N0.getOperand(0), N0.getOperand(1), N1, Flags);
10877 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10878 // Note: Commutes FADD operands.
10879 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10880 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10881 N1.getOperand(0), N1.getOperand(1), N0, Flags);
10884 // Look through FP_EXTEND nodes to do more combining.
10886 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10887 if (N0.getOpcode() == ISD::FP_EXTEND) {
10888 SDValue N00 = N0.getOperand(0);
10889 if (isContractableFMUL(N00) &&
10890 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10891 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10892 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10893 N00.getOperand(0)),
10894 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10895 N00.getOperand(1)), N1, Flags);
10899 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10900 // Note: Commutes FADD operands.
10901 if (N1.getOpcode() == ISD::FP_EXTEND) {
10902 SDValue N10 = N1.getOperand(0);
10903 if (isContractableFMUL(N10) &&
10904 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10905 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10906 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10907 N10.getOperand(0)),
10908 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10909 N10.getOperand(1)), N0, Flags);
10913 // More folding opportunities when target permits.
10914 if (Aggressive) {
10915 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10916 if (CanFuse &&
10917 N0.getOpcode() == PreferredFusedOpcode &&
10918 N0.getOperand(2).getOpcode() == ISD::FMUL &&
10919 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10920 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10921 N0.getOperand(0), N0.getOperand(1),
10922 DAG.getNode(PreferredFusedOpcode, SL, VT,
10923 N0.getOperand(2).getOperand(0),
10924 N0.getOperand(2).getOperand(1),
10925 N1, Flags), Flags);
10928 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10929 if (CanFuse &&
10930 N1->getOpcode() == PreferredFusedOpcode &&
10931 N1.getOperand(2).getOpcode() == ISD::FMUL &&
10932 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10933 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10934 N1.getOperand(0), N1.getOperand(1),
10935 DAG.getNode(PreferredFusedOpcode, SL, VT,
10936 N1.getOperand(2).getOperand(0),
10937 N1.getOperand(2).getOperand(1),
10938 N0, Flags), Flags);
10942 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10943 // -> (fma x, y, (fma (fpext u), (fpext v), z))
10944 auto FoldFAddFMAFPExtFMul = [&] (
10945 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10946 SDNodeFlags Flags) {
10947 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10948 DAG.getNode(PreferredFusedOpcode, SL, VT,
10949 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10950 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10951 Z, Flags), Flags);
10953 if (N0.getOpcode() == PreferredFusedOpcode) {
10954 SDValue N02 = N0.getOperand(2);
10955 if (N02.getOpcode() == ISD::FP_EXTEND) {
10956 SDValue N020 = N02.getOperand(0);
10957 if (isContractableFMUL(N020) &&
10958 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10959 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10960 N020.getOperand(0), N020.getOperand(1),
10961 N1, Flags);
10966 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10967 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10968 // FIXME: This turns two single-precision and one double-precision
10969 // operation into two double-precision operations, which might not be
10970 // interesting for all targets, especially GPUs.
10971 auto FoldFAddFPExtFMAFMul = [&] (
10972 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10973 SDNodeFlags Flags) {
10974 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10975 DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10976 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10977 DAG.getNode(PreferredFusedOpcode, SL, VT,
10978 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10979 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10980 Z, Flags), Flags);
10982 if (N0.getOpcode() == ISD::FP_EXTEND) {
10983 SDValue N00 = N0.getOperand(0);
10984 if (N00.getOpcode() == PreferredFusedOpcode) {
10985 SDValue N002 = N00.getOperand(2);
10986 if (isContractableFMUL(N002) &&
10987 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10988 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10989 N002.getOperand(0), N002.getOperand(1),
10990 N1, Flags);
10995 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10996 // -> (fma y, z, (fma (fpext u), (fpext v), x))
10997 if (N1.getOpcode() == PreferredFusedOpcode) {
10998 SDValue N12 = N1.getOperand(2);
10999 if (N12.getOpcode() == ISD::FP_EXTEND) {
11000 SDValue N120 = N12.getOperand(0);
11001 if (isContractableFMUL(N120) &&
11002 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11003 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11004 N120.getOperand(0), N120.getOperand(1),
11005 N0, Flags);
11010 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11011 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11012 // FIXME: This turns two single-precision and one double-precision
11013 // operation into two double-precision operations, which might not be
11014 // interesting for all targets, especially GPUs.
11015 if (N1.getOpcode() == ISD::FP_EXTEND) {
11016 SDValue N10 = N1.getOperand(0);
11017 if (N10.getOpcode() == PreferredFusedOpcode) {
11018 SDValue N102 = N10.getOperand(2);
11019 if (isContractableFMUL(N102) &&
11020 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11021 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11022 N102.getOperand(0), N102.getOperand(1),
11023 N0, Flags);
11029 return SDValue();
11032 /// Try to perform FMA combining on a given FSUB node.
11033 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11034 SDValue N0 = N->getOperand(0);
11035 SDValue N1 = N->getOperand(1);
11036 EVT VT = N->getValueType(0);
11037 SDLoc SL(N);
11039 const TargetOptions &Options = DAG.getTarget().Options;
11040 // Floating-point multiply-add with intermediate rounding.
11041 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11043 // Floating-point multiply-add without intermediate rounding.
11044 bool HasFMA =
11045 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11046 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11048 // No valid opcode, do not combine.
11049 if (!HasFMAD && !HasFMA)
11050 return SDValue();
11052 const SDNodeFlags Flags = N->getFlags();
11053 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11054 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11055 CanFuse || HasFMAD);
11057 // If the subtraction is not contractable, do not combine.
11058 if (!AllowFusionGlobally && !isContractable(N))
11059 return SDValue();
11061 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11062 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11063 return SDValue();
11065 // Always prefer FMAD to FMA for precision.
11066 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11067 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11069 // Is the node an FMUL and contractable either due to global flags or
11070 // SDNodeFlags.
11071 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11072 if (N.getOpcode() != ISD::FMUL)
11073 return false;
11074 return AllowFusionGlobally || isContractable(N.getNode());
11077 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11078 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11079 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11080 N0.getOperand(0), N0.getOperand(1),
11081 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11084 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11085 // Note: Commutes FSUB operands.
11086 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11087 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11088 DAG.getNode(ISD::FNEG, SL, VT,
11089 N1.getOperand(0)),
11090 N1.getOperand(1), N0, Flags);
11093 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11094 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11095 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11096 SDValue N00 = N0.getOperand(0).getOperand(0);
11097 SDValue N01 = N0.getOperand(0).getOperand(1);
11098 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11099 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11100 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11103 // Look through FP_EXTEND nodes to do more combining.
11105 // fold (fsub (fpext (fmul x, y)), z)
11106 // -> (fma (fpext x), (fpext y), (fneg z))
11107 if (N0.getOpcode() == ISD::FP_EXTEND) {
11108 SDValue N00 = N0.getOperand(0);
11109 if (isContractableFMUL(N00) &&
11110 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11111 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11112 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11113 N00.getOperand(0)),
11114 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11115 N00.getOperand(1)),
11116 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11120 // fold (fsub x, (fpext (fmul y, z)))
11121 // -> (fma (fneg (fpext y)), (fpext z), x)
11122 // Note: Commutes FSUB operands.
11123 if (N1.getOpcode() == ISD::FP_EXTEND) {
11124 SDValue N10 = N1.getOperand(0);
11125 if (isContractableFMUL(N10) &&
11126 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11127 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11128 DAG.getNode(ISD::FNEG, SL, VT,
11129 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11130 N10.getOperand(0))),
11131 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11132 N10.getOperand(1)),
11133 N0, Flags);
11137 // fold (fsub (fpext (fneg (fmul, x, y))), z)
11138 // -> (fneg (fma (fpext x), (fpext y), z))
11139 // Note: This could be removed with appropriate canonicalization of the
11140 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11141 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11142 // from implementing the canonicalization in visitFSUB.
11143 if (N0.getOpcode() == ISD::FP_EXTEND) {
11144 SDValue N00 = N0.getOperand(0);
11145 if (N00.getOpcode() == ISD::FNEG) {
11146 SDValue N000 = N00.getOperand(0);
11147 if (isContractableFMUL(N000) &&
11148 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11149 return DAG.getNode(ISD::FNEG, SL, VT,
11150 DAG.getNode(PreferredFusedOpcode, SL, VT,
11151 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11152 N000.getOperand(0)),
11153 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11154 N000.getOperand(1)),
11155 N1, Flags));
11160 // fold (fsub (fneg (fpext (fmul, x, y))), z)
11161 // -> (fneg (fma (fpext x)), (fpext y), z)
11162 // Note: This could be removed with appropriate canonicalization of the
11163 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11164 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11165 // from implementing the canonicalization in visitFSUB.
11166 if (N0.getOpcode() == ISD::FNEG) {
11167 SDValue N00 = N0.getOperand(0);
11168 if (N00.getOpcode() == ISD::FP_EXTEND) {
11169 SDValue N000 = N00.getOperand(0);
11170 if (isContractableFMUL(N000) &&
11171 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11172 return DAG.getNode(ISD::FNEG, SL, VT,
11173 DAG.getNode(PreferredFusedOpcode, SL, VT,
11174 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11175 N000.getOperand(0)),
11176 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11177 N000.getOperand(1)),
11178 N1, Flags));
11183 // More folding opportunities when target permits.
11184 if (Aggressive) {
11185 // fold (fsub (fma x, y, (fmul u, v)), z)
11186 // -> (fma x, y (fma u, v, (fneg z)))
11187 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11188 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11189 N0.getOperand(2)->hasOneUse()) {
11190 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11191 N0.getOperand(0), N0.getOperand(1),
11192 DAG.getNode(PreferredFusedOpcode, SL, VT,
11193 N0.getOperand(2).getOperand(0),
11194 N0.getOperand(2).getOperand(1),
11195 DAG.getNode(ISD::FNEG, SL, VT,
11196 N1), Flags), Flags);
11199 // fold (fsub x, (fma y, z, (fmul u, v)))
11200 // -> (fma (fneg y), z, (fma (fneg u), v, x))
11201 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11202 isContractableFMUL(N1.getOperand(2))) {
11203 SDValue N20 = N1.getOperand(2).getOperand(0);
11204 SDValue N21 = N1.getOperand(2).getOperand(1);
11205 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11206 DAG.getNode(ISD::FNEG, SL, VT,
11207 N1.getOperand(0)),
11208 N1.getOperand(1),
11209 DAG.getNode(PreferredFusedOpcode, SL, VT,
11210 DAG.getNode(ISD::FNEG, SL, VT, N20),
11211 N21, N0, Flags), Flags);
11215 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11216 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11217 if (N0.getOpcode() == PreferredFusedOpcode) {
11218 SDValue N02 = N0.getOperand(2);
11219 if (N02.getOpcode() == ISD::FP_EXTEND) {
11220 SDValue N020 = N02.getOperand(0);
11221 if (isContractableFMUL(N020) &&
11222 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11223 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11224 N0.getOperand(0), N0.getOperand(1),
11225 DAG.getNode(PreferredFusedOpcode, SL, VT,
11226 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11227 N020.getOperand(0)),
11228 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11229 N020.getOperand(1)),
11230 DAG.getNode(ISD::FNEG, SL, VT,
11231 N1), Flags), Flags);
11236 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11237 // -> (fma (fpext x), (fpext y),
11238 // (fma (fpext u), (fpext v), (fneg z)))
11239 // FIXME: This turns two single-precision and one double-precision
11240 // operation into two double-precision operations, which might not be
11241 // interesting for all targets, especially GPUs.
11242 if (N0.getOpcode() == ISD::FP_EXTEND) {
11243 SDValue N00 = N0.getOperand(0);
11244 if (N00.getOpcode() == PreferredFusedOpcode) {
11245 SDValue N002 = N00.getOperand(2);
11246 if (isContractableFMUL(N002) &&
11247 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11248 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11249 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11250 N00.getOperand(0)),
11251 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11252 N00.getOperand(1)),
11253 DAG.getNode(PreferredFusedOpcode, SL, VT,
11254 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11255 N002.getOperand(0)),
11256 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11257 N002.getOperand(1)),
11258 DAG.getNode(ISD::FNEG, SL, VT,
11259 N1), Flags), Flags);
11264 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11265 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11266 if (N1.getOpcode() == PreferredFusedOpcode &&
11267 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11268 SDValue N120 = N1.getOperand(2).getOperand(0);
11269 if (isContractableFMUL(N120) &&
11270 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11271 SDValue N1200 = N120.getOperand(0);
11272 SDValue N1201 = N120.getOperand(1);
11273 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11274 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11275 N1.getOperand(1),
11276 DAG.getNode(PreferredFusedOpcode, SL, VT,
11277 DAG.getNode(ISD::FNEG, SL, VT,
11278 DAG.getNode(ISD::FP_EXTEND, SL,
11279 VT, N1200)),
11280 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11281 N1201),
11282 N0, Flags), Flags);
11286 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11287 // -> (fma (fneg (fpext y)), (fpext z),
11288 // (fma (fneg (fpext u)), (fpext v), x))
11289 // FIXME: This turns two single-precision and one double-precision
11290 // operation into two double-precision operations, which might not be
11291 // interesting for all targets, especially GPUs.
11292 if (N1.getOpcode() == ISD::FP_EXTEND &&
11293 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11294 SDValue CvtSrc = N1.getOperand(0);
11295 SDValue N100 = CvtSrc.getOperand(0);
11296 SDValue N101 = CvtSrc.getOperand(1);
11297 SDValue N102 = CvtSrc.getOperand(2);
11298 if (isContractableFMUL(N102) &&
11299 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11300 SDValue N1020 = N102.getOperand(0);
11301 SDValue N1021 = N102.getOperand(1);
11302 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11303 DAG.getNode(ISD::FNEG, SL, VT,
11304 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11305 N100)),
11306 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11307 DAG.getNode(PreferredFusedOpcode, SL, VT,
11308 DAG.getNode(ISD::FNEG, SL, VT,
11309 DAG.getNode(ISD::FP_EXTEND, SL,
11310 VT, N1020)),
11311 DAG.getNode(ISD::FP_EXTEND, SL, VT,
11312 N1021),
11313 N0, Flags), Flags);
11318 return SDValue();
11321 /// Try to perform FMA combining on a given FMUL node based on the distributive
11322 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11323 /// subtraction instead of addition).
11324 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11325 SDValue N0 = N->getOperand(0);
11326 SDValue N1 = N->getOperand(1);
11327 EVT VT = N->getValueType(0);
11328 SDLoc SL(N);
11329 const SDNodeFlags Flags = N->getFlags();
11331 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11333 const TargetOptions &Options = DAG.getTarget().Options;
11335 // The transforms below are incorrect when x == 0 and y == inf, because the
11336 // intermediate multiplication produces a nan.
11337 if (!Options.NoInfsFPMath)
11338 return SDValue();
11340 // Floating-point multiply-add without intermediate rounding.
11341 bool HasFMA =
11342 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11343 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11344 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11346 // Floating-point multiply-add with intermediate rounding. This can result
11347 // in a less precise result due to the changed rounding order.
11348 bool HasFMAD = Options.UnsafeFPMath &&
11349 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11351 // No valid opcode, do not combine.
11352 if (!HasFMAD && !HasFMA)
11353 return SDValue();
11355 // Always prefer FMAD to FMA for precision.
11356 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11357 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11359 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11360 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11361 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11362 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11363 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11364 if (C->isExactlyValue(+1.0))
11365 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11366 Y, Flags);
11367 if (C->isExactlyValue(-1.0))
11368 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11369 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11372 return SDValue();
11375 if (SDValue FMA = FuseFADD(N0, N1, Flags))
11376 return FMA;
11377 if (SDValue FMA = FuseFADD(N1, N0, Flags))
11378 return FMA;
11380 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11381 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11382 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11383 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11384 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11385 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11386 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11387 if (C0->isExactlyValue(+1.0))
11388 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11389 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11390 Y, Flags);
11391 if (C0->isExactlyValue(-1.0))
11392 return DAG.getNode(PreferredFusedOpcode, SL, VT,
11393 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11394 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11396 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11397 if (C1->isExactlyValue(+1.0))
11398 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11399 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11400 if (C1->isExactlyValue(-1.0))
11401 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11402 Y, Flags);
11405 return SDValue();
11408 if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11409 return FMA;
11410 if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11411 return FMA;
11413 return SDValue();
11416 SDValue DAGCombiner::visitFADD(SDNode *N) {
11417 SDValue N0 = N->getOperand(0);
11418 SDValue N1 = N->getOperand(1);
11419 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11420 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11421 EVT VT = N->getValueType(0);
11422 SDLoc DL(N);
11423 const TargetOptions &Options = DAG.getTarget().Options;
11424 const SDNodeFlags Flags = N->getFlags();
11426 // fold vector ops
11427 if (VT.isVector())
11428 if (SDValue FoldedVOp = SimplifyVBinOp(N))
11429 return FoldedVOp;
11431 // fold (fadd c1, c2) -> c1 + c2
11432 if (N0CFP && N1CFP)
11433 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11435 // canonicalize constant to RHS
11436 if (N0CFP && !N1CFP)
11437 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11439 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11440 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11441 if (N1C && N1C->isZero())
11442 if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11443 return N0;
11445 if (SDValue NewSel = foldBinOpIntoSelect(N))
11446 return NewSel;
11448 // fold (fadd A, (fneg B)) -> (fsub A, B)
11449 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11450 isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
11451 return DAG.getNode(ISD::FSUB, DL, VT, N0,
11452 GetNegatedExpression(N1, DAG, LegalOperations,
11453 ForCodeSize), Flags);
11455 // fold (fadd (fneg A), B) -> (fsub B, A)
11456 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11457 isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
11458 return DAG.getNode(ISD::FSUB, DL, VT, N1,
11459 GetNegatedExpression(N0, DAG, LegalOperations,
11460 ForCodeSize), Flags);
11462 auto isFMulNegTwo = [](SDValue FMul) {
11463 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11464 return false;
11465 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11466 return C && C->isExactlyValue(-2.0);
11469 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11470 if (isFMulNegTwo(N0)) {
11471 SDValue B = N0.getOperand(0);
11472 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11473 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11475 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11476 if (isFMulNegTwo(N1)) {
11477 SDValue B = N1.getOperand(0);
11478 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11479 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11482 // No FP constant should be created after legalization as Instruction
11483 // Selection pass has a hard time dealing with FP constants.
11484 bool AllowNewConst = (Level < AfterLegalizeDAG);
11486 // If 'unsafe math' or nnan is enabled, fold lots of things.
11487 if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11488 // If allowed, fold (fadd (fneg x), x) -> 0.0
11489 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11490 return DAG.getConstantFP(0.0, DL, VT);
11492 // If allowed, fold (fadd x, (fneg x)) -> 0.0
11493 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11494 return DAG.getConstantFP(0.0, DL, VT);
11497 // If 'unsafe math' or reassoc and nsz, fold lots of things.
11498 // TODO: break out portions of the transformations below for which Unsafe is
11499 // considered and which do not require both nsz and reassoc
11500 if ((Options.UnsafeFPMath ||
11501 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11502 AllowNewConst) {
11503 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11504 if (N1CFP && N0.getOpcode() == ISD::FADD &&
11505 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11506 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11507 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11510 // We can fold chains of FADD's of the same value into multiplications.
11511 // This transform is not safe in general because we are reducing the number
11512 // of rounding steps.
11513 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11514 if (N0.getOpcode() == ISD::FMUL) {
11515 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11516 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11518 // (fadd (fmul x, c), x) -> (fmul x, c+1)
11519 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11520 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11521 DAG.getConstantFP(1.0, DL, VT), Flags);
11522 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11525 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11526 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11527 N1.getOperand(0) == N1.getOperand(1) &&
11528 N0.getOperand(0) == N1.getOperand(0)) {
11529 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11530 DAG.getConstantFP(2.0, DL, VT), Flags);
11531 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11535 if (N1.getOpcode() == ISD::FMUL) {
11536 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11537 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11539 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11540 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11541 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11542 DAG.getConstantFP(1.0, DL, VT), Flags);
11543 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11546 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11547 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11548 N0.getOperand(0) == N0.getOperand(1) &&
11549 N1.getOperand(0) == N0.getOperand(0)) {
11550 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11551 DAG.getConstantFP(2.0, DL, VT), Flags);
11552 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11556 if (N0.getOpcode() == ISD::FADD) {
11557 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11558 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11559 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11560 (N0.getOperand(0) == N1)) {
11561 return DAG.getNode(ISD::FMUL, DL, VT,
11562 N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11566 if (N1.getOpcode() == ISD::FADD) {
11567 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11568 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11569 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11570 N1.getOperand(0) == N0) {
11571 return DAG.getNode(ISD::FMUL, DL, VT,
11572 N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11576 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11577 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11578 N0.getOperand(0) == N0.getOperand(1) &&
11579 N1.getOperand(0) == N1.getOperand(1) &&
11580 N0.getOperand(0) == N1.getOperand(0)) {
11581 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11582 DAG.getConstantFP(4.0, DL, VT), Flags);
11585 } // enable-unsafe-fp-math
11587 // FADD -> FMA combines:
11588 if (SDValue Fused = visitFADDForFMACombine(N)) {
11589 AddToWorklist(Fused.getNode());
11590 return Fused;
11592 return SDValue();
11595 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11596 SDValue N0 = N->getOperand(0);
11597 SDValue N1 = N->getOperand(1);
11598 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11599 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11600 EVT VT = N->getValueType(0);
11601 SDLoc DL(N);
11602 const TargetOptions &Options = DAG.getTarget().Options;
11603 const SDNodeFlags Flags = N->getFlags();
11605 // fold vector ops
11606 if (VT.isVector())
11607 if (SDValue FoldedVOp = SimplifyVBinOp(N))
11608 return FoldedVOp;
11610 // fold (fsub c1, c2) -> c1-c2
11611 if (N0CFP && N1CFP)
11612 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11614 if (SDValue NewSel = foldBinOpIntoSelect(N))
11615 return NewSel;
11617 // (fsub A, 0) -> A
11618 if (N1CFP && N1CFP->isZero()) {
11619 if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11620 Flags.hasNoSignedZeros()) {
11621 return N0;
11625 if (N0 == N1) {
11626 // (fsub x, x) -> 0.0
11627 if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11628 return DAG.getConstantFP(0.0f, DL, VT);
11631 // (fsub -0.0, N1) -> -N1
11632 // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
11633 // FSUB does not specify the sign bit of a NaN. Also note that for
11634 // the same reason, the inverse transform is not safe, unless fast math
11635 // flags are in play.
11636 if (N0CFP && N0CFP->isZero()) {
11637 if (N0CFP->isNegative() ||
11638 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11639 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
11640 return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
11641 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11642 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11646 if ((Options.UnsafeFPMath ||
11647 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11648 && N1.getOpcode() == ISD::FADD) {
11649 // X - (X + Y) -> -Y
11650 if (N0 == N1->getOperand(0))
11651 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11652 // X - (Y + X) -> -Y
11653 if (N0 == N1->getOperand(1))
11654 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11657 // fold (fsub A, (fneg B)) -> (fadd A, B)
11658 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
11659 return DAG.getNode(ISD::FADD, DL, VT, N0,
11660 GetNegatedExpression(N1, DAG, LegalOperations,
11661 ForCodeSize), Flags);
11663 // FSUB -> FMA combines:
11664 if (SDValue Fused = visitFSUBForFMACombine(N)) {
11665 AddToWorklist(Fused.getNode());
11666 return Fused;
11669 return SDValue();
11672 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11673 SDValue N0 = N->getOperand(0);
11674 SDValue N1 = N->getOperand(1);
11675 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11676 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11677 EVT VT = N->getValueType(0);
11678 SDLoc DL(N);
11679 const TargetOptions &Options = DAG.getTarget().Options;
11680 const SDNodeFlags Flags = N->getFlags();
11682 // fold vector ops
11683 if (VT.isVector()) {
11684 // This just handles C1 * C2 for vectors. Other vector folds are below.
11685 if (SDValue FoldedVOp = SimplifyVBinOp(N))
11686 return FoldedVOp;
11689 // fold (fmul c1, c2) -> c1*c2
11690 if (N0CFP && N1CFP)
11691 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11693 // canonicalize constant to RHS
11694 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11695 !isConstantFPBuildVectorOrConstantFP(N1))
11696 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11698 // fold (fmul A, 1.0) -> A
11699 if (N1CFP && N1CFP->isExactlyValue(1.0))
11700 return N0;
11702 if (SDValue NewSel = foldBinOpIntoSelect(N))
11703 return NewSel;
11705 if (Options.UnsafeFPMath ||
11706 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11707 // fold (fmul A, 0) -> 0
11708 if (N1CFP && N1CFP->isZero())
11709 return N1;
11712 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11713 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11714 if (isConstantFPBuildVectorOrConstantFP(N1) &&
11715 N0.getOpcode() == ISD::FMUL) {
11716 SDValue N00 = N0.getOperand(0);
11717 SDValue N01 = N0.getOperand(1);
11718 // Avoid an infinite loop by making sure that N00 is not a constant
11719 // (the inner multiply has not been constant folded yet).
11720 if (isConstantFPBuildVectorOrConstantFP(N01) &&
11721 !isConstantFPBuildVectorOrConstantFP(N00)) {
11722 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11723 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11727 // Match a special-case: we convert X * 2.0 into fadd.
11728 // fmul (fadd X, X), C -> fmul X, 2.0 * C
11729 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11730 N0.getOperand(0) == N0.getOperand(1)) {
11731 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11732 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11733 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11737 // fold (fmul X, 2.0) -> (fadd X, X)
11738 if (N1CFP && N1CFP->isExactlyValue(+2.0))
11739 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11741 // fold (fmul X, -1.0) -> (fneg X)
11742 if (N1CFP && N1CFP->isExactlyValue(-1.0))
11743 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11744 return DAG.getNode(ISD::FNEG, DL, VT, N0);
11746 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11747 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
11748 ForCodeSize)) {
11749 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
11750 ForCodeSize)) {
11751 // Both can be negated for free, check to see if at least one is cheaper
11752 // negated.
11753 if (LHSNeg == 2 || RHSNeg == 2)
11754 return DAG.getNode(ISD::FMUL, DL, VT,
11755 GetNegatedExpression(N0, DAG, LegalOperations,
11756 ForCodeSize),
11757 GetNegatedExpression(N1, DAG, LegalOperations,
11758 ForCodeSize),
11759 Flags);
11763 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11764 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11765 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11766 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11767 TLI.isOperationLegal(ISD::FABS, VT)) {
11768 SDValue Select = N0, X = N1;
11769 if (Select.getOpcode() != ISD::SELECT)
11770 std::swap(Select, X);
11772 SDValue Cond = Select.getOperand(0);
11773 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11774 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11776 if (TrueOpnd && FalseOpnd &&
11777 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11778 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11779 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11780 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11781 switch (CC) {
11782 default: break;
11783 case ISD::SETOLT:
11784 case ISD::SETULT:
11785 case ISD::SETOLE:
11786 case ISD::SETULE:
11787 case ISD::SETLT:
11788 case ISD::SETLE:
11789 std::swap(TrueOpnd, FalseOpnd);
11790 LLVM_FALLTHROUGH;
11791 case ISD::SETOGT:
11792 case ISD::SETUGT:
11793 case ISD::SETOGE:
11794 case ISD::SETUGE:
11795 case ISD::SETGT:
11796 case ISD::SETGE:
11797 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11798 TLI.isOperationLegal(ISD::FNEG, VT))
11799 return DAG.getNode(ISD::FNEG, DL, VT,
11800 DAG.getNode(ISD::FABS, DL, VT, X));
11801 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11802 return DAG.getNode(ISD::FABS, DL, VT, X);
11804 break;
11809 // FMUL -> FMA combines:
11810 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11811 AddToWorklist(Fused.getNode());
11812 return Fused;
11815 return SDValue();
11818 SDValue DAGCombiner::visitFMA(SDNode *N) {
11819 SDValue N0 = N->getOperand(0);
11820 SDValue N1 = N->getOperand(1);
11821 SDValue N2 = N->getOperand(2);
11822 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11823 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11824 EVT VT = N->getValueType(0);
11825 SDLoc DL(N);
11826 const TargetOptions &Options = DAG.getTarget().Options;
11828 // FMA nodes have flags that propagate to the created nodes.
11829 const SDNodeFlags Flags = N->getFlags();
11830 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11832 // Constant fold FMA.
11833 if (isa<ConstantFPSDNode>(N0) &&
11834 isa<ConstantFPSDNode>(N1) &&
11835 isa<ConstantFPSDNode>(N2)) {
11836 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11839 if (UnsafeFPMath) {
11840 if (N0CFP && N0CFP->isZero())
11841 return N2;
11842 if (N1CFP && N1CFP->isZero())
11843 return N2;
11845 // TODO: The FMA node should have flags that propagate to these nodes.
11846 if (N0CFP && N0CFP->isExactlyValue(1.0))
11847 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11848 if (N1CFP && N1CFP->isExactlyValue(1.0))
11849 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11851 // Canonicalize (fma c, x, y) -> (fma x, c, y)
11852 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11853 !isConstantFPBuildVectorOrConstantFP(N1))
11854 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11856 if (UnsafeFPMath) {
11857 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11858 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11859 isConstantFPBuildVectorOrConstantFP(N1) &&
11860 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11861 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11862 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11863 Flags), Flags);
11866 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11867 if (N0.getOpcode() == ISD::FMUL &&
11868 isConstantFPBuildVectorOrConstantFP(N1) &&
11869 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11870 return DAG.getNode(ISD::FMA, DL, VT,
11871 N0.getOperand(0),
11872 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11873 Flags),
11874 N2);
11878 // (fma x, 1, y) -> (fadd x, y)
11879 // (fma x, -1, y) -> (fadd (fneg x), y)
11880 if (N1CFP) {
11881 if (N1CFP->isExactlyValue(1.0))
11882 // TODO: The FMA node should have flags that propagate to this node.
11883 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11885 if (N1CFP->isExactlyValue(-1.0) &&
11886 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11887 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11888 AddToWorklist(RHSNeg.getNode());
11889 // TODO: The FMA node should have flags that propagate to this node.
11890 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11893 // fma (fneg x), K, y -> fma x -K, y
11894 if (N0.getOpcode() == ISD::FNEG &&
11895 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11896 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
11897 ForCodeSize)))) {
11898 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11899 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11903 if (UnsafeFPMath) {
11904 // (fma x, c, x) -> (fmul x, (c+1))
11905 if (N1CFP && N0 == N2) {
11906 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11907 DAG.getNode(ISD::FADD, DL, VT, N1,
11908 DAG.getConstantFP(1.0, DL, VT), Flags),
11909 Flags);
11912 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11913 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11914 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11915 DAG.getNode(ISD::FADD, DL, VT, N1,
11916 DAG.getConstantFP(-1.0, DL, VT), Flags),
11917 Flags);
11921 return SDValue();
11924 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11925 // reciprocal.
11926 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11927 // Notice that this is not always beneficial. One reason is different targets
11928 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11929 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11930 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11931 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11932 // TODO: Limit this transform based on optsize/minsize - it always creates at
11933 // least 1 extra instruction. But the perf win may be substantial enough
11934 // that only minsize should restrict this.
11935 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11936 const SDNodeFlags Flags = N->getFlags();
11937 if (!UnsafeMath && !Flags.hasAllowReciprocal())
11938 return SDValue();
11940 // Skip if current node is a reciprocal.
11941 SDValue N0 = N->getOperand(0);
11942 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
11943 if (N0CFP && N0CFP->isExactlyValue(1.0))
11944 return SDValue();
11946 // Exit early if the target does not want this transform or if there can't
11947 // possibly be enough uses of the divisor to make the transform worthwhile.
11948 SDValue N1 = N->getOperand(1);
11949 unsigned MinUses = TLI.combineRepeatedFPDivisors();
11951 // For splat vectors, scale the number of uses by the splat factor. If we can
11952 // convert the division into a scalar op, that will likely be much faster.
11953 unsigned NumElts = 1;
11954 EVT VT = N->getValueType(0);
11955 if (VT.isVector() && DAG.isSplatValue(N1))
11956 NumElts = VT.getVectorNumElements();
11958 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
11959 return SDValue();
11961 // Find all FDIV users of the same divisor.
11962 // Use a set because duplicates may be present in the user list.
11963 SetVector<SDNode *> Users;
11964 for (auto *U : N1->uses()) {
11965 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11966 // This division is eligible for optimization only if global unsafe math
11967 // is enabled or if this division allows reciprocal formation.
11968 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11969 Users.insert(U);
11973 // Now that we have the actual number of divisor uses, make sure it meets
11974 // the minimum threshold specified by the target.
11975 if ((Users.size() * NumElts) < MinUses)
11976 return SDValue();
11978 SDLoc DL(N);
11979 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11980 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11982 // Dividend / Divisor -> Dividend * Reciprocal
11983 for (auto *U : Users) {
11984 SDValue Dividend = U->getOperand(0);
11985 if (Dividend != FPOne) {
11986 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11987 Reciprocal, Flags);
11988 CombineTo(U, NewNode);
11989 } else if (U != Reciprocal.getNode()) {
11990 // In the absence of fast-math-flags, this user node is always the
11991 // same node as Reciprocal, but with FMF they may be different nodes.
11992 CombineTo(U, Reciprocal);
11995 return SDValue(N, 0); // N was replaced.
11998 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11999 SDValue N0 = N->getOperand(0);
12000 SDValue N1 = N->getOperand(1);
12001 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12002 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12003 EVT VT = N->getValueType(0);
12004 SDLoc DL(N);
12005 const TargetOptions &Options = DAG.getTarget().Options;
12006 SDNodeFlags Flags = N->getFlags();
12008 // fold vector ops
12009 if (VT.isVector())
12010 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12011 return FoldedVOp;
12013 // fold (fdiv c1, c2) -> c1/c2
12014 if (N0CFP && N1CFP)
12015 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12017 if (SDValue NewSel = foldBinOpIntoSelect(N))
12018 return NewSel;
12020 if (SDValue V = combineRepeatedFPDivisors(N))
12021 return V;
12023 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12024 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12025 if (N1CFP) {
12026 // Compute the reciprocal 1.0 / c2.
12027 const APFloat &N1APF = N1CFP->getValueAPF();
12028 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12029 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12030 // Only do the transform if the reciprocal is a legal fp immediate that
12031 // isn't too nasty (eg NaN, denormal, ...).
12032 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12033 (!LegalOperations ||
12034 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12035 // backend)... we should handle this gracefully after Legalize.
12036 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12037 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12038 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12039 return DAG.getNode(ISD::FMUL, DL, VT, N0,
12040 DAG.getConstantFP(Recip, DL, VT), Flags);
12043 // If this FDIV is part of a reciprocal square root, it may be folded
12044 // into a target-specific square root estimate instruction.
12045 if (N1.getOpcode() == ISD::FSQRT) {
12046 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
12047 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12049 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12050 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12051 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12052 Flags)) {
12053 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12054 AddToWorklist(RV.getNode());
12055 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12057 } else if (N1.getOpcode() == ISD::FP_ROUND &&
12058 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12059 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12060 Flags)) {
12061 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12062 AddToWorklist(RV.getNode());
12063 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12065 } else if (N1.getOpcode() == ISD::FMUL) {
12066 // Look through an FMUL. Even though this won't remove the FDIV directly,
12067 // it's still worthwhile to get rid of the FSQRT if possible.
12068 SDValue SqrtOp;
12069 SDValue OtherOp;
12070 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12071 SqrtOp = N1.getOperand(0);
12072 OtherOp = N1.getOperand(1);
12073 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12074 SqrtOp = N1.getOperand(1);
12075 OtherOp = N1.getOperand(0);
12077 if (SqrtOp.getNode()) {
12078 // We found a FSQRT, so try to make this fold:
12079 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12080 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12081 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12082 AddToWorklist(RV.getNode());
12083 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12088 // Fold into a reciprocal estimate and multiply instead of a real divide.
12089 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12090 AddToWorklist(RV.getNode());
12091 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12095 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12096 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12097 ForCodeSize)) {
12098 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12099 ForCodeSize)) {
12100 // Both can be negated for free, check to see if at least one is cheaper
12101 // negated.
12102 if (LHSNeg == 2 || RHSNeg == 2)
12103 return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12104 GetNegatedExpression(N0, DAG, LegalOperations,
12105 ForCodeSize),
12106 GetNegatedExpression(N1, DAG, LegalOperations,
12107 ForCodeSize),
12108 Flags);
12112 return SDValue();
12115 SDValue DAGCombiner::visitFREM(SDNode *N) {
12116 SDValue N0 = N->getOperand(0);
12117 SDValue N1 = N->getOperand(1);
12118 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12119 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12120 EVT VT = N->getValueType(0);
12122 // fold (frem c1, c2) -> fmod(c1,c2)
12123 if (N0CFP && N1CFP)
12124 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12126 if (SDValue NewSel = foldBinOpIntoSelect(N))
12127 return NewSel;
12129 return SDValue();
12132 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12133 SDNodeFlags Flags = N->getFlags();
12134 if (!DAG.getTarget().Options.UnsafeFPMath &&
12135 !Flags.hasApproximateFuncs())
12136 return SDValue();
12138 SDValue N0 = N->getOperand(0);
12139 if (TLI.isFsqrtCheap(N0, DAG))
12140 return SDValue();
12142 // FSQRT nodes have flags that propagate to the created nodes.
12143 return buildSqrtEstimate(N0, Flags);
12146 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12147 /// copysign(x, fp_round(y)) -> copysign(x, y)
12148 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12149 SDValue N1 = N->getOperand(1);
12150 if ((N1.getOpcode() == ISD::FP_EXTEND ||
12151 N1.getOpcode() == ISD::FP_ROUND)) {
12152 // Do not optimize out type conversion of f128 type yet.
12153 // For some targets like x86_64, configuration is changed to keep one f128
12154 // value in one SSE register, but instruction selection cannot handle
12155 // FCOPYSIGN on SSE registers yet.
12156 EVT N1VT = N1->getValueType(0);
12157 EVT N1Op0VT = N1->getOperand(0).getValueType();
12158 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12160 return false;
12163 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12164 SDValue N0 = N->getOperand(0);
12165 SDValue N1 = N->getOperand(1);
12166 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12167 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12168 EVT VT = N->getValueType(0);
12170 if (N0CFP && N1CFP) // Constant fold
12171 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12173 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12174 const APFloat &V = N1C->getValueAPF();
12175 // copysign(x, c1) -> fabs(x) iff ispos(c1)
12176 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12177 if (!V.isNegative()) {
12178 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12179 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12180 } else {
12181 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12182 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12183 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12187 // copysign(fabs(x), y) -> copysign(x, y)
12188 // copysign(fneg(x), y) -> copysign(x, y)
12189 // copysign(copysign(x,z), y) -> copysign(x, y)
12190 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12191 N0.getOpcode() == ISD::FCOPYSIGN)
12192 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12194 // copysign(x, abs(y)) -> abs(x)
12195 if (N1.getOpcode() == ISD::FABS)
12196 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12198 // copysign(x, copysign(y,z)) -> copysign(x, z)
12199 if (N1.getOpcode() == ISD::FCOPYSIGN)
12200 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12202 // copysign(x, fp_extend(y)) -> copysign(x, y)
12203 // copysign(x, fp_round(y)) -> copysign(x, y)
12204 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12205 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12207 return SDValue();
12210 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12211 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12212 if (!ExponentC)
12213 return SDValue();
12215 // Try to convert x ** (1/3) into cube root.
12216 // TODO: Handle the various flavors of long double.
12217 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12218 // Some range near 1/3 should be fine.
12219 EVT VT = N->getValueType(0);
12220 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12221 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12222 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12223 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12224 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
12225 // For regular numbers, rounding may cause the results to differ.
12226 // Therefore, we require { nsz ninf nnan afn } for this transform.
12227 // TODO: We could select out the special cases if we don't have nsz/ninf.
12228 SDNodeFlags Flags = N->getFlags();
12229 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12230 !Flags.hasApproximateFuncs())
12231 return SDValue();
12233 // Do not create a cbrt() libcall if the target does not have it, and do not
12234 // turn a pow that has lowering support into a cbrt() libcall.
12235 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12236 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12237 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12238 return SDValue();
12240 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12243 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12244 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12245 // TODO: This could be extended (using a target hook) to handle smaller
12246 // power-of-2 fractional exponents.
12247 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12248 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12249 if (ExponentIs025 || ExponentIs075) {
12250 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12251 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
12252 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12253 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
12254 // For regular numbers, rounding may cause the results to differ.
12255 // Therefore, we require { nsz ninf afn } for this transform.
12256 // TODO: We could select out the special cases if we don't have nsz/ninf.
12257 SDNodeFlags Flags = N->getFlags();
12259 // We only need no signed zeros for the 0.25 case.
12260 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12261 !Flags.hasApproximateFuncs())
12262 return SDValue();
12264 // Don't double the number of libcalls. We are trying to inline fast code.
12265 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12266 return SDValue();
12268 // Assume that libcalls are the smallest code.
12269 // TODO: This restriction should probably be lifted for vectors.
12270 if (DAG.getMachineFunction().getFunction().hasOptSize())
12271 return SDValue();
12273 // pow(X, 0.25) --> sqrt(sqrt(X))
12274 SDLoc DL(N);
12275 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12276 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12277 if (ExponentIs025)
12278 return SqrtSqrt;
12279 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12280 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12283 return SDValue();
12286 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12287 const TargetLowering &TLI) {
12288 // This optimization is guarded by a function attribute because it may produce
12289 // unexpected results. Ie, programs may be relying on the platform-specific
12290 // undefined behavior when the float-to-int conversion overflows.
12291 const Function &F = DAG.getMachineFunction().getFunction();
12292 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12293 if (StrictOverflow.getValueAsString().equals("false"))
12294 return SDValue();
12296 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12297 // replacing casts with a libcall. We also must be allowed to ignore -0.0
12298 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12299 // conversions would return +0.0.
12300 // FIXME: We should be able to use node-level FMF here.
12301 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12302 EVT VT = N->getValueType(0);
12303 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12304 !DAG.getTarget().Options.NoSignedZerosFPMath)
12305 return SDValue();
12307 // fptosi/fptoui round towards zero, so converting from FP to integer and
12308 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12309 SDValue N0 = N->getOperand(0);
12310 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12311 N0.getOperand(0).getValueType() == VT)
12312 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12314 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12315 N0.getOperand(0).getValueType() == VT)
12316 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12318 return SDValue();
12321 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12322 SDValue N0 = N->getOperand(0);
12323 EVT VT = N->getValueType(0);
12324 EVT OpVT = N0.getValueType();
12326 // fold (sint_to_fp c1) -> c1fp
12327 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12328 // ...but only if the target supports immediate floating-point values
12329 (!LegalOperations ||
12330 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12331 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12333 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12334 // but UINT_TO_FP is legal on this target, try to convert.
12335 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12336 hasOperation(ISD::UINT_TO_FP, OpVT)) {
12337 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12338 if (DAG.SignBitIsZero(N0))
12339 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12342 // The next optimizations are desirable only if SELECT_CC can be lowered.
12343 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12344 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12345 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12346 !VT.isVector() &&
12347 (!LegalOperations ||
12348 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12349 SDLoc DL(N);
12350 SDValue Ops[] =
12351 { N0.getOperand(0), N0.getOperand(1),
12352 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12353 N0.getOperand(2) };
12354 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12357 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12358 // (select_cc x, y, 1.0, 0.0,, cc)
12359 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12360 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12361 (!LegalOperations ||
12362 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12363 SDLoc DL(N);
12364 SDValue Ops[] =
12365 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12366 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12367 N0.getOperand(0).getOperand(2) };
12368 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12372 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12373 return FTrunc;
12375 return SDValue();
12378 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12379 SDValue N0 = N->getOperand(0);
12380 EVT VT = N->getValueType(0);
12381 EVT OpVT = N0.getValueType();
12383 // fold (uint_to_fp c1) -> c1fp
12384 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12385 // ...but only if the target supports immediate floating-point values
12386 (!LegalOperations ||
12387 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12388 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12390 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12391 // but SINT_TO_FP is legal on this target, try to convert.
12392 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12393 hasOperation(ISD::SINT_TO_FP, OpVT)) {
12394 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12395 if (DAG.SignBitIsZero(N0))
12396 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12399 // The next optimizations are desirable only if SELECT_CC can be lowered.
12400 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12401 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12402 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12403 (!LegalOperations ||
12404 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12405 SDLoc DL(N);
12406 SDValue Ops[] =
12407 { N0.getOperand(0), N0.getOperand(1),
12408 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12409 N0.getOperand(2) };
12410 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12414 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12415 return FTrunc;
12417 return SDValue();
12420 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12421 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12422 SDValue N0 = N->getOperand(0);
12423 EVT VT = N->getValueType(0);
12425 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12426 return SDValue();
12428 SDValue Src = N0.getOperand(0);
12429 EVT SrcVT = Src.getValueType();
12430 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12431 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12433 // We can safely assume the conversion won't overflow the output range,
12434 // because (for example) (uint8_t)18293.f is undefined behavior.
12436 // Since we can assume the conversion won't overflow, our decision as to
12437 // whether the input will fit in the float should depend on the minimum
12438 // of the input range and output range.
12440 // This means this is also safe for a signed input and unsigned output, since
12441 // a negative input would lead to undefined behavior.
12442 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12443 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12444 unsigned ActualSize = std::min(InputSize, OutputSize);
12445 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12447 // We can only fold away the float conversion if the input range can be
12448 // represented exactly in the float range.
12449 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12450 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12451 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12452 : ISD::ZERO_EXTEND;
12453 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12455 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12456 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12457 return DAG.getBitcast(VT, Src);
12459 return SDValue();
12462 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12463 SDValue N0 = N->getOperand(0);
12464 EVT VT = N->getValueType(0);
12466 // fold (fp_to_sint c1fp) -> c1
12467 if (isConstantFPBuildVectorOrConstantFP(N0))
12468 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12470 return FoldIntToFPToInt(N, DAG);
12473 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12474 SDValue N0 = N->getOperand(0);
12475 EVT VT = N->getValueType(0);
12477 // fold (fp_to_uint c1fp) -> c1
12478 if (isConstantFPBuildVectorOrConstantFP(N0))
12479 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12481 return FoldIntToFPToInt(N, DAG);
12484 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12485 SDValue N0 = N->getOperand(0);
12486 SDValue N1 = N->getOperand(1);
12487 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12488 EVT VT = N->getValueType(0);
12490 // fold (fp_round c1fp) -> c1fp
12491 if (N0CFP)
12492 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12494 // fold (fp_round (fp_extend x)) -> x
12495 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12496 return N0.getOperand(0);
12498 // fold (fp_round (fp_round x)) -> (fp_round x)
12499 if (N0.getOpcode() == ISD::FP_ROUND) {
12500 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12501 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12503 // Skip this folding if it results in an fp_round from f80 to f16.
12505 // f80 to f16 always generates an expensive (and as yet, unimplemented)
12506 // libcall to __truncxfhf2 instead of selecting native f16 conversion
12507 // instructions from f32 or f64. Moreover, the first (value-preserving)
12508 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12509 // x86.
12510 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12511 return SDValue();
12513 // If the first fp_round isn't a value preserving truncation, it might
12514 // introduce a tie in the second fp_round, that wouldn't occur in the
12515 // single-step fp_round we want to fold to.
12516 // In other words, double rounding isn't the same as rounding.
12517 // Also, this is a value preserving truncation iff both fp_round's are.
12518 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12519 SDLoc DL(N);
12520 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12521 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12525 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12526 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12527 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12528 N0.getOperand(0), N1);
12529 AddToWorklist(Tmp.getNode());
12530 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12531 Tmp, N0.getOperand(1));
12534 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12535 return NewVSel;
12537 return SDValue();
12540 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12541 SDValue N0 = N->getOperand(0);
12542 EVT VT = N->getValueType(0);
12543 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12544 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12546 // fold (fp_round_inreg c1fp) -> c1fp
12547 if (N0CFP && isTypeLegal(EVT)) {
12548 SDLoc DL(N);
12549 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12550 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12553 return SDValue();
12556 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12557 SDValue N0 = N->getOperand(0);
12558 EVT VT = N->getValueType(0);
12560 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12561 if (N->hasOneUse() &&
12562 N->use_begin()->getOpcode() == ISD::FP_ROUND)
12563 return SDValue();
12565 // fold (fp_extend c1fp) -> c1fp
12566 if (isConstantFPBuildVectorOrConstantFP(N0))
12567 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12569 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12570 if (N0.getOpcode() == ISD::FP16_TO_FP &&
12571 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12572 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12574 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12575 // value of X.
12576 if (N0.getOpcode() == ISD::FP_ROUND
12577 && N0.getConstantOperandVal(1) == 1) {
12578 SDValue In = N0.getOperand(0);
12579 if (In.getValueType() == VT) return In;
12580 if (VT.bitsLT(In.getValueType()))
12581 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12582 In, N0.getOperand(1));
12583 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12586 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12587 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12588 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12589 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12590 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12591 LN0->getChain(),
12592 LN0->getBasePtr(), N0.getValueType(),
12593 LN0->getMemOperand());
12594 CombineTo(N, ExtLoad);
12595 CombineTo(N0.getNode(),
12596 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12597 N0.getValueType(), ExtLoad,
12598 DAG.getIntPtrConstant(1, SDLoc(N0))),
12599 ExtLoad.getValue(1));
12600 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12603 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12604 return NewVSel;
12606 return SDValue();
12609 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12610 SDValue N0 = N->getOperand(0);
12611 EVT VT = N->getValueType(0);
12613 // fold (fceil c1) -> fceil(c1)
12614 if (isConstantFPBuildVectorOrConstantFP(N0))
12615 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12617 return SDValue();
12620 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12621 SDValue N0 = N->getOperand(0);
12622 EVT VT = N->getValueType(0);
12624 // fold (ftrunc c1) -> ftrunc(c1)
12625 if (isConstantFPBuildVectorOrConstantFP(N0))
12626 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12628 // fold ftrunc (known rounded int x) -> x
12629 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12630 // likely to be generated to extract integer from a rounded floating value.
12631 switch (N0.getOpcode()) {
12632 default: break;
12633 case ISD::FRINT:
12634 case ISD::FTRUNC:
12635 case ISD::FNEARBYINT:
12636 case ISD::FFLOOR:
12637 case ISD::FCEIL:
12638 return N0;
12641 return SDValue();
12644 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12645 SDValue N0 = N->getOperand(0);
12646 EVT VT = N->getValueType(0);
12648 // fold (ffloor c1) -> ffloor(c1)
12649 if (isConstantFPBuildVectorOrConstantFP(N0))
12650 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12652 return SDValue();
12655 // FIXME: FNEG and FABS have a lot in common; refactor.
12656 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12657 SDValue N0 = N->getOperand(0);
12658 EVT VT = N->getValueType(0);
12660 // Constant fold FNEG.
12661 if (isConstantFPBuildVectorOrConstantFP(N0))
12662 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12664 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12665 &DAG.getTarget().Options, ForCodeSize))
12666 return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12668 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12669 // constant pool values.
12670 if (!TLI.isFNegFree(VT) &&
12671 N0.getOpcode() == ISD::BITCAST &&
12672 N0.getNode()->hasOneUse()) {
12673 SDValue Int = N0.getOperand(0);
12674 EVT IntVT = Int.getValueType();
12675 if (IntVT.isInteger() && !IntVT.isVector()) {
12676 APInt SignMask;
12677 if (N0.getValueType().isVector()) {
12678 // For a vector, get a mask such as 0x80... per scalar element
12679 // and splat it.
12680 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12681 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12682 } else {
12683 // For a scalar, just generate 0x80...
12684 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12686 SDLoc DL0(N0);
12687 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12688 DAG.getConstant(SignMask, DL0, IntVT));
12689 AddToWorklist(Int.getNode());
12690 return DAG.getBitcast(VT, Int);
12694 // (fneg (fmul c, x)) -> (fmul -c, x)
12695 if (N0.getOpcode() == ISD::FMUL &&
12696 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12697 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12698 if (CFP1) {
12699 APFloat CVal = CFP1->getValueAPF();
12700 CVal.changeSign();
12701 if (Level >= AfterLegalizeDAG &&
12702 (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
12703 TLI.isOperationLegal(ISD::ConstantFP, VT)))
12704 return DAG.getNode(
12705 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12706 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12707 N0->getFlags());
12711 return SDValue();
12714 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12715 APFloat (*Op)(const APFloat &, const APFloat &)) {
12716 SDValue N0 = N->getOperand(0);
12717 SDValue N1 = N->getOperand(1);
12718 EVT VT = N->getValueType(0);
12719 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12720 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12722 if (N0CFP && N1CFP) {
12723 const APFloat &C0 = N0CFP->getValueAPF();
12724 const APFloat &C1 = N1CFP->getValueAPF();
12725 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12728 // Canonicalize to constant on RHS.
12729 if (isConstantFPBuildVectorOrConstantFP(N0) &&
12730 !isConstantFPBuildVectorOrConstantFP(N1))
12731 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12733 return SDValue();
12736 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12737 return visitFMinMax(DAG, N, minnum);
12740 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12741 return visitFMinMax(DAG, N, maxnum);
12744 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12745 return visitFMinMax(DAG, N, minimum);
12748 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12749 return visitFMinMax(DAG, N, maximum);
12752 SDValue DAGCombiner::visitFABS(SDNode *N) {
12753 SDValue N0 = N->getOperand(0);
12754 EVT VT = N->getValueType(0);
12756 // fold (fabs c1) -> fabs(c1)
12757 if (isConstantFPBuildVectorOrConstantFP(N0))
12758 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12760 // fold (fabs (fabs x)) -> (fabs x)
12761 if (N0.getOpcode() == ISD::FABS)
12762 return N->getOperand(0);
12764 // fold (fabs (fneg x)) -> (fabs x)
12765 // fold (fabs (fcopysign x, y)) -> (fabs x)
12766 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12767 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12769 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12770 if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12771 SDValue Int = N0.getOperand(0);
12772 EVT IntVT = Int.getValueType();
12773 if (IntVT.isInteger() && !IntVT.isVector()) {
12774 APInt SignMask;
12775 if (N0.getValueType().isVector()) {
12776 // For a vector, get a mask such as 0x7f... per scalar element
12777 // and splat it.
12778 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12779 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12780 } else {
12781 // For a scalar, just generate 0x7f...
12782 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12784 SDLoc DL(N0);
12785 Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12786 DAG.getConstant(SignMask, DL, IntVT));
12787 AddToWorklist(Int.getNode());
12788 return DAG.getBitcast(N->getValueType(0), Int);
12792 return SDValue();
12795 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12796 SDValue Chain = N->getOperand(0);
12797 SDValue N1 = N->getOperand(1);
12798 SDValue N2 = N->getOperand(2);
12800 // If N is a constant we could fold this into a fallthrough or unconditional
12801 // branch. However that doesn't happen very often in normal code, because
12802 // Instcombine/SimplifyCFG should have handled the available opportunities.
12803 // If we did this folding here, it would be necessary to update the
12804 // MachineBasicBlock CFG, which is awkward.
12806 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12807 // on the target.
12808 if (N1.getOpcode() == ISD::SETCC &&
12809 TLI.isOperationLegalOrCustom(ISD::BR_CC,
12810 N1.getOperand(0).getValueType())) {
12811 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12812 Chain, N1.getOperand(2),
12813 N1.getOperand(0), N1.getOperand(1), N2);
12816 if (N1.hasOneUse()) {
12817 if (SDValue NewN1 = rebuildSetCC(N1))
12818 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12821 return SDValue();
12824 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12825 if (N.getOpcode() == ISD::SRL ||
12826 (N.getOpcode() == ISD::TRUNCATE &&
12827 (N.getOperand(0).hasOneUse() &&
12828 N.getOperand(0).getOpcode() == ISD::SRL))) {
12829 // Look pass the truncate.
12830 if (N.getOpcode() == ISD::TRUNCATE)
12831 N = N.getOperand(0);
12833 // Match this pattern so that we can generate simpler code:
12835 // %a = ...
12836 // %b = and i32 %a, 2
12837 // %c = srl i32 %b, 1
12838 // brcond i32 %c ...
12840 // into
12842 // %a = ...
12843 // %b = and i32 %a, 2
12844 // %c = setcc eq %b, 0
12845 // brcond %c ...
12847 // This applies only when the AND constant value has one bit set and the
12848 // SRL constant is equal to the log2 of the AND constant. The back-end is
12849 // smart enough to convert the result into a TEST/JMP sequence.
12850 SDValue Op0 = N.getOperand(0);
12851 SDValue Op1 = N.getOperand(1);
12853 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12854 SDValue AndOp1 = Op0.getOperand(1);
12856 if (AndOp1.getOpcode() == ISD::Constant) {
12857 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12859 if (AndConst.isPowerOf2() &&
12860 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12861 SDLoc DL(N);
12862 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12863 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12864 ISD::SETNE);
12870 // Transform br(xor(x, y)) -> br(x != y)
12871 // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12872 if (N.getOpcode() == ISD::XOR) {
12873 // Because we may call this on a speculatively constructed
12874 // SimplifiedSetCC Node, we need to simplify this node first.
12875 // Ideally this should be folded into SimplifySetCC and not
12876 // here. For now, grab a handle to N so we don't lose it from
12877 // replacements interal to the visit.
12878 HandleSDNode XORHandle(N);
12879 while (N.getOpcode() == ISD::XOR) {
12880 SDValue Tmp = visitXOR(N.getNode());
12881 // No simplification done.
12882 if (!Tmp.getNode())
12883 break;
12884 // Returning N is form in-visit replacement that may invalidated
12885 // N. Grab value from Handle.
12886 if (Tmp.getNode() == N.getNode())
12887 N = XORHandle.getValue();
12888 else // Node simplified. Try simplifying again.
12889 N = Tmp;
12892 if (N.getOpcode() != ISD::XOR)
12893 return N;
12895 SDNode *TheXor = N.getNode();
12897 SDValue Op0 = TheXor->getOperand(0);
12898 SDValue Op1 = TheXor->getOperand(1);
12900 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12901 bool Equal = false;
12902 if (isOneConstant(Op0) && Op0.hasOneUse() &&
12903 Op0.getOpcode() == ISD::XOR) {
12904 TheXor = Op0.getNode();
12905 Equal = true;
12908 EVT SetCCVT = N.getValueType();
12909 if (LegalTypes)
12910 SetCCVT = getSetCCResultType(SetCCVT);
12911 // Replace the uses of XOR with SETCC
12912 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12913 Equal ? ISD::SETEQ : ISD::SETNE);
12917 return SDValue();
12920 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12922 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12923 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12924 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12926 // If N is a constant we could fold this into a fallthrough or unconditional
12927 // branch. However that doesn't happen very often in normal code, because
12928 // Instcombine/SimplifyCFG should have handled the available opportunities.
12929 // If we did this folding here, it would be necessary to update the
12930 // MachineBasicBlock CFG, which is awkward.
12932 // Use SimplifySetCC to simplify SETCC's.
12933 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12934 CondLHS, CondRHS, CC->get(), SDLoc(N),
12935 false);
12936 if (Simp.getNode()) AddToWorklist(Simp.getNode());
12938 // fold to a simpler setcc
12939 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12940 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12941 N->getOperand(0), Simp.getOperand(2),
12942 Simp.getOperand(0), Simp.getOperand(1),
12943 N->getOperand(4));
12945 return SDValue();
12948 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12949 /// and that N may be folded in the load / store addressing mode.
12950 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12951 SelectionDAG &DAG,
12952 const TargetLowering &TLI) {
12953 EVT VT;
12954 unsigned AS;
12956 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
12957 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12958 return false;
12959 VT = LD->getMemoryVT();
12960 AS = LD->getAddressSpace();
12961 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
12962 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12963 return false;
12964 VT = ST->getMemoryVT();
12965 AS = ST->getAddressSpace();
12966 } else
12967 return false;
12969 TargetLowering::AddrMode AM;
12970 if (N->getOpcode() == ISD::ADD) {
12971 AM.HasBaseReg = true;
12972 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12973 if (Offset)
12974 // [reg +/- imm]
12975 AM.BaseOffs = Offset->getSExtValue();
12976 else
12977 // [reg +/- reg]
12978 AM.Scale = 1;
12979 } else if (N->getOpcode() == ISD::SUB) {
12980 AM.HasBaseReg = true;
12981 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12982 if (Offset)
12983 // [reg +/- imm]
12984 AM.BaseOffs = -Offset->getSExtValue();
12985 else
12986 // [reg +/- reg]
12987 AM.Scale = 1;
12988 } else
12989 return false;
12991 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12992 VT.getTypeForEVT(*DAG.getContext()), AS);
12995 /// Try turning a load/store into a pre-indexed load/store when the base
12996 /// pointer is an add or subtract and it has other uses besides the load/store.
12997 /// After the transformation, the new indexed load/store has effectively folded
12998 /// the add/subtract in and all of its other uses are redirected to the
12999 /// new load/store.
13000 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13001 if (Level < AfterLegalizeDAG)
13002 return false;
13004 bool isLoad = true;
13005 SDValue Ptr;
13006 EVT VT;
13007 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13008 if (LD->isIndexed())
13009 return false;
13010 VT = LD->getMemoryVT();
13011 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13012 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13013 return false;
13014 Ptr = LD->getBasePtr();
13015 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13016 if (ST->isIndexed())
13017 return false;
13018 VT = ST->getMemoryVT();
13019 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13020 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13021 return false;
13022 Ptr = ST->getBasePtr();
13023 isLoad = false;
13024 } else {
13025 return false;
13028 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13029 // out. There is no reason to make this a preinc/predec.
13030 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13031 Ptr.getNode()->hasOneUse())
13032 return false;
13034 // Ask the target to do addressing mode selection.
13035 SDValue BasePtr;
13036 SDValue Offset;
13037 ISD::MemIndexedMode AM = ISD::UNINDEXED;
13038 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13039 return false;
13041 // Backends without true r+i pre-indexed forms may need to pass a
13042 // constant base with a variable offset so that constant coercion
13043 // will work with the patterns in canonical form.
13044 bool Swapped = false;
13045 if (isa<ConstantSDNode>(BasePtr)) {
13046 std::swap(BasePtr, Offset);
13047 Swapped = true;
13050 // Don't create a indexed load / store with zero offset.
13051 if (isNullConstant(Offset))
13052 return false;
13054 // Try turning it into a pre-indexed load / store except when:
13055 // 1) The new base ptr is a frame index.
13056 // 2) If N is a store and the new base ptr is either the same as or is a
13057 // predecessor of the value being stored.
13058 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13059 // that would create a cycle.
13060 // 4) All uses are load / store ops that use it as old base ptr.
13062 // Check #1. Preinc'ing a frame index would require copying the stack pointer
13063 // (plus the implicit offset) to a register to preinc anyway.
13064 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13065 return false;
13067 // Check #2.
13068 if (!isLoad) {
13069 SDValue Val = cast<StoreSDNode>(N)->getValue();
13071 // Would require a copy.
13072 if (Val == BasePtr)
13073 return false;
13075 // Would create a cycle.
13076 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13077 return false;
13080 // Caches for hasPredecessorHelper.
13081 SmallPtrSet<const SDNode *, 32> Visited;
13082 SmallVector<const SDNode *, 16> Worklist;
13083 Worklist.push_back(N);
13085 // If the offset is a constant, there may be other adds of constants that
13086 // can be folded with this one. We should do this to avoid having to keep
13087 // a copy of the original base pointer.
13088 SmallVector<SDNode *, 16> OtherUses;
13089 if (isa<ConstantSDNode>(Offset))
13090 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13091 UE = BasePtr.getNode()->use_end();
13092 UI != UE; ++UI) {
13093 SDUse &Use = UI.getUse();
13094 // Skip the use that is Ptr and uses of other results from BasePtr's
13095 // node (important for nodes that return multiple results).
13096 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13097 continue;
13099 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13100 continue;
13102 if (Use.getUser()->getOpcode() != ISD::ADD &&
13103 Use.getUser()->getOpcode() != ISD::SUB) {
13104 OtherUses.clear();
13105 break;
13108 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13109 if (!isa<ConstantSDNode>(Op1)) {
13110 OtherUses.clear();
13111 break;
13114 // FIXME: In some cases, we can be smarter about this.
13115 if (Op1.getValueType() != Offset.getValueType()) {
13116 OtherUses.clear();
13117 break;
13120 OtherUses.push_back(Use.getUser());
13123 if (Swapped)
13124 std::swap(BasePtr, Offset);
13126 // Now check for #3 and #4.
13127 bool RealUse = false;
13129 for (SDNode *Use : Ptr.getNode()->uses()) {
13130 if (Use == N)
13131 continue;
13132 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13133 return false;
13135 // If Ptr may be folded in addressing mode of other use, then it's
13136 // not profitable to do this transformation.
13137 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13138 RealUse = true;
13141 if (!RealUse)
13142 return false;
13144 SDValue Result;
13145 if (isLoad)
13146 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13147 BasePtr, Offset, AM);
13148 else
13149 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13150 BasePtr, Offset, AM);
13151 ++PreIndexedNodes;
13152 ++NodesCombined;
13153 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13154 Result.getNode()->dump(&DAG); dbgs() << '\n');
13155 WorklistRemover DeadNodes(*this);
13156 if (isLoad) {
13157 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13158 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13159 } else {
13160 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13163 // Finally, since the node is now dead, remove it from the graph.
13164 deleteAndRecombine(N);
13166 if (Swapped)
13167 std::swap(BasePtr, Offset);
13169 // Replace other uses of BasePtr that can be updated to use Ptr
13170 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13171 unsigned OffsetIdx = 1;
13172 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13173 OffsetIdx = 0;
13174 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13175 BasePtr.getNode() && "Expected BasePtr operand");
13177 // We need to replace ptr0 in the following expression:
13178 // x0 * offset0 + y0 * ptr0 = t0
13179 // knowing that
13180 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13182 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13183 // indexed load/store and the expression that needs to be re-written.
13185 // Therefore, we have:
13186 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13188 ConstantSDNode *CN =
13189 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13190 int X0, X1, Y0, Y1;
13191 const APInt &Offset0 = CN->getAPIntValue();
13192 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13194 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13195 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13196 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13197 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13199 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13201 APInt CNV = Offset0;
13202 if (X0 < 0) CNV = -CNV;
13203 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13204 else CNV = CNV - Offset1;
13206 SDLoc DL(OtherUses[i]);
13208 // We can now generate the new expression.
13209 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13210 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13212 SDValue NewUse = DAG.getNode(Opcode,
13214 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13215 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13216 deleteAndRecombine(OtherUses[i]);
13219 // Replace the uses of Ptr with uses of the updated base value.
13220 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13221 deleteAndRecombine(Ptr.getNode());
13222 AddToWorklist(Result.getNode());
13224 return true;
13227 /// Try to combine a load/store with a add/sub of the base pointer node into a
13228 /// post-indexed load/store. The transformation folded the add/subtract into the
13229 /// new indexed load/store effectively and all of its uses are redirected to the
13230 /// new load/store.
13231 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13232 if (Level < AfterLegalizeDAG)
13233 return false;
13235 bool isLoad = true;
13236 SDValue Ptr;
13237 EVT VT;
13238 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
13239 if (LD->isIndexed())
13240 return false;
13241 VT = LD->getMemoryVT();
13242 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13243 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13244 return false;
13245 Ptr = LD->getBasePtr();
13246 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
13247 if (ST->isIndexed())
13248 return false;
13249 VT = ST->getMemoryVT();
13250 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13251 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13252 return false;
13253 Ptr = ST->getBasePtr();
13254 isLoad = false;
13255 } else {
13256 return false;
13259 if (Ptr.getNode()->hasOneUse())
13260 return false;
13262 for (SDNode *Op : Ptr.getNode()->uses()) {
13263 if (Op == N ||
13264 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13265 continue;
13267 SDValue BasePtr;
13268 SDValue Offset;
13269 ISD::MemIndexedMode AM = ISD::UNINDEXED;
13270 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13271 // Don't create a indexed load / store with zero offset.
13272 if (isNullConstant(Offset))
13273 continue;
13275 // Try turning it into a post-indexed load / store except when
13276 // 1) All uses are load / store ops that use it as base ptr (and
13277 // it may be folded as addressing mmode).
13278 // 2) Op must be independent of N, i.e. Op is neither a predecessor
13279 // nor a successor of N. Otherwise, if Op is folded that would
13280 // create a cycle.
13282 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13283 continue;
13285 // Check for #1.
13286 bool TryNext = false;
13287 for (SDNode *Use : BasePtr.getNode()->uses()) {
13288 if (Use == Ptr.getNode())
13289 continue;
13291 // If all the uses are load / store addresses, then don't do the
13292 // transformation.
13293 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13294 bool RealUse = false;
13295 for (SDNode *UseUse : Use->uses()) {
13296 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13297 RealUse = true;
13300 if (!RealUse) {
13301 TryNext = true;
13302 break;
13307 if (TryNext)
13308 continue;
13310 // Check for #2.
13311 SmallPtrSet<const SDNode *, 32> Visited;
13312 SmallVector<const SDNode *, 8> Worklist;
13313 // Ptr is predecessor to both N and Op.
13314 Visited.insert(Ptr.getNode());
13315 Worklist.push_back(N);
13316 Worklist.push_back(Op);
13317 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13318 !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13319 SDValue Result = isLoad
13320 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13321 BasePtr, Offset, AM)
13322 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13323 BasePtr, Offset, AM);
13324 ++PostIndexedNodes;
13325 ++NodesCombined;
13326 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13327 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13328 dbgs() << '\n');
13329 WorklistRemover DeadNodes(*this);
13330 if (isLoad) {
13331 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13332 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13333 } else {
13334 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13337 // Finally, since the node is now dead, remove it from the graph.
13338 deleteAndRecombine(N);
13340 // Replace the uses of Use with uses of the updated base value.
13341 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13342 Result.getValue(isLoad ? 1 : 0));
13343 deleteAndRecombine(Op);
13344 return true;
13349 return false;
13352 /// Return the base-pointer arithmetic from an indexed \p LD.
13353 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13354 ISD::MemIndexedMode AM = LD->getAddressingMode();
13355 assert(AM != ISD::UNINDEXED);
13356 SDValue BP = LD->getOperand(1);
13357 SDValue Inc = LD->getOperand(2);
13359 // Some backends use TargetConstants for load offsets, but don't expect
13360 // TargetConstants in general ADD nodes. We can convert these constants into
13361 // regular Constants (if the constant is not opaque).
13362 assert((Inc.getOpcode() != ISD::TargetConstant ||
13363 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13364 "Cannot split out indexing using opaque target constants");
13365 if (Inc.getOpcode() == ISD::TargetConstant) {
13366 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13367 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13368 ConstInc->getValueType(0));
13371 unsigned Opc =
13372 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13373 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13376 static inline int numVectorEltsOrZero(EVT T) {
13377 return T.isVector() ? T.getVectorNumElements() : 0;
13380 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13381 Val = ST->getValue();
13382 EVT STType = Val.getValueType();
13383 EVT STMemType = ST->getMemoryVT();
13384 if (STType == STMemType)
13385 return true;
13386 if (isTypeLegal(STMemType))
13387 return false; // fail.
13388 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13389 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13390 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13391 return true;
13393 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13394 STType.isInteger() && STMemType.isInteger()) {
13395 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13396 return true;
13398 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13399 Val = DAG.getBitcast(STMemType, Val);
13400 return true;
13402 return false; // fail.
13405 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13406 EVT LDMemType = LD->getMemoryVT();
13407 EVT LDType = LD->getValueType(0);
13408 assert(Val.getValueType() == LDMemType &&
13409 "Attempting to extend value of non-matching type");
13410 if (LDType == LDMemType)
13411 return true;
13412 if (LDMemType.isInteger() && LDType.isInteger()) {
13413 switch (LD->getExtensionType()) {
13414 case ISD::NON_EXTLOAD:
13415 Val = DAG.getBitcast(LDType, Val);
13416 return true;
13417 case ISD::EXTLOAD:
13418 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13419 return true;
13420 case ISD::SEXTLOAD:
13421 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13422 return true;
13423 case ISD::ZEXTLOAD:
13424 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13425 return true;
13428 return false;
13431 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13432 if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13433 return SDValue();
13434 SDValue Chain = LD->getOperand(0);
13435 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13436 if (!ST || ST->isVolatile())
13437 return SDValue();
13439 EVT LDType = LD->getValueType(0);
13440 EVT LDMemType = LD->getMemoryVT();
13441 EVT STMemType = ST->getMemoryVT();
13442 EVT STType = ST->getValue().getValueType();
13444 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13445 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13446 int64_t Offset;
13447 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13448 return SDValue();
13450 // Normalize for Endianness. After this Offset=0 will denote that the least
13451 // significant bit in the loaded value maps to the least significant bit in
13452 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13453 // n:th least significant byte of the stored value.
13454 if (DAG.getDataLayout().isBigEndian())
13455 Offset = (STMemType.getStoreSizeInBits() -
13456 LDMemType.getStoreSizeInBits()) / 8 - Offset;
13458 // Check that the stored value cover all bits that are loaded.
13459 bool STCoversLD =
13460 (Offset >= 0) &&
13461 (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13463 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13464 if (LD->isIndexed()) {
13465 bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13466 LD->getAddressingMode() == ISD::POST_DEC);
13467 unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13468 SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13469 LD->getOperand(1), LD->getOperand(2));
13470 SDValue Ops[] = {Val, Idx, Chain};
13471 return CombineTo(LD, Ops, 3);
13473 return CombineTo(LD, Val, Chain);
13476 if (!STCoversLD)
13477 return SDValue();
13479 // Memory as copy space (potentially masked).
13480 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13481 // Simple case: Direct non-truncating forwarding
13482 if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13483 return ReplaceLd(LD, ST->getValue(), Chain);
13484 // Can we model the truncate and extension with an and mask?
13485 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13486 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13487 // Mask to size of LDMemType
13488 auto Mask =
13489 DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13490 STMemType.getSizeInBits()),
13491 SDLoc(ST), STType);
13492 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13493 return ReplaceLd(LD, Val, Chain);
13497 // TODO: Deal with nonzero offset.
13498 if (LD->getBasePtr().isUndef() || Offset != 0)
13499 return SDValue();
13500 // Model necessary truncations / extenstions.
13501 SDValue Val;
13502 // Truncate Value To Stored Memory Size.
13503 do {
13504 if (!getTruncatedStoreValue(ST, Val))
13505 continue;
13506 if (!isTypeLegal(LDMemType))
13507 continue;
13508 if (STMemType != LDMemType) {
13509 // TODO: Support vectors? This requires extract_subvector/bitcast.
13510 if (!STMemType.isVector() && !LDMemType.isVector() &&
13511 STMemType.isInteger() && LDMemType.isInteger())
13512 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13513 else
13514 continue;
13516 if (!extendLoadedValueToExtension(LD, Val))
13517 continue;
13518 return ReplaceLd(LD, Val, Chain);
13519 } while (false);
13521 // On failure, cleanup dead nodes we may have created.
13522 if (Val->use_empty())
13523 deleteAndRecombine(Val.getNode());
13524 return SDValue();
13527 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13528 LoadSDNode *LD = cast<LoadSDNode>(N);
13529 SDValue Chain = LD->getChain();
13530 SDValue Ptr = LD->getBasePtr();
13532 // If load is not volatile and there are no uses of the loaded value (and
13533 // the updated indexed value in case of indexed loads), change uses of the
13534 // chain value into uses of the chain input (i.e. delete the dead load).
13535 if (!LD->isVolatile()) {
13536 if (N->getValueType(1) == MVT::Other) {
13537 // Unindexed loads.
13538 if (!N->hasAnyUseOfValue(0)) {
13539 // It's not safe to use the two value CombineTo variant here. e.g.
13540 // v1, chain2 = load chain1, loc
13541 // v2, chain3 = load chain2, loc
13542 // v3 = add v2, c
13543 // Now we replace use of chain2 with chain1. This makes the second load
13544 // isomorphic to the one we are deleting, and thus makes this load live.
13545 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13546 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13547 dbgs() << "\n");
13548 WorklistRemover DeadNodes(*this);
13549 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13550 AddUsersToWorklist(Chain.getNode());
13551 if (N->use_empty())
13552 deleteAndRecombine(N);
13554 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13556 } else {
13557 // Indexed loads.
13558 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13560 // If this load has an opaque TargetConstant offset, then we cannot split
13561 // the indexing into an add/sub directly (that TargetConstant may not be
13562 // valid for a different type of node, and we cannot convert an opaque
13563 // target constant into a regular constant).
13564 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13565 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13567 if (!N->hasAnyUseOfValue(0) &&
13568 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13569 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13570 SDValue Index;
13571 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13572 Index = SplitIndexingFromLoad(LD);
13573 // Try to fold the base pointer arithmetic into subsequent loads and
13574 // stores.
13575 AddUsersToWorklist(N);
13576 } else
13577 Index = DAG.getUNDEF(N->getValueType(1));
13578 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13579 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13580 dbgs() << " and 2 other values\n");
13581 WorklistRemover DeadNodes(*this);
13582 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13583 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13584 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13585 deleteAndRecombine(N);
13586 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13591 // If this load is directly stored, replace the load value with the stored
13592 // value.
13593 if (auto V = ForwardStoreValueToDirectLoad(LD))
13594 return V;
13596 // Try to infer better alignment information than the load already has.
13597 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13598 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13599 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13600 SDValue NewLoad = DAG.getExtLoad(
13601 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13602 LD->getPointerInfo(), LD->getMemoryVT(), Align,
13603 LD->getMemOperand()->getFlags(), LD->getAAInfo());
13604 // NewLoad will always be N as we are only refining the alignment
13605 assert(NewLoad.getNode() == N);
13606 (void)NewLoad;
13611 if (LD->isUnindexed()) {
13612 // Walk up chain skipping non-aliasing memory nodes.
13613 SDValue BetterChain = FindBetterChain(LD, Chain);
13615 // If there is a better chain.
13616 if (Chain != BetterChain) {
13617 SDValue ReplLoad;
13619 // Replace the chain to void dependency.
13620 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13621 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13622 BetterChain, Ptr, LD->getMemOperand());
13623 } else {
13624 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13625 LD->getValueType(0),
13626 BetterChain, Ptr, LD->getMemoryVT(),
13627 LD->getMemOperand());
13630 // Create token factor to keep old chain connected.
13631 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13632 MVT::Other, Chain, ReplLoad.getValue(1));
13634 // Replace uses with load result and token factor
13635 return CombineTo(N, ReplLoad.getValue(0), Token);
13639 // Try transforming N to an indexed load.
13640 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13641 return SDValue(N, 0);
13643 // Try to slice up N to more direct loads if the slices are mapped to
13644 // different register banks or pairing can take place.
13645 if (SliceUpLoad(N))
13646 return SDValue(N, 0);
13648 return SDValue();
13651 namespace {
13653 /// Helper structure used to slice a load in smaller loads.
13654 /// Basically a slice is obtained from the following sequence:
13655 /// Origin = load Ty1, Base
13656 /// Shift = srl Ty1 Origin, CstTy Amount
13657 /// Inst = trunc Shift to Ty2
13659 /// Then, it will be rewritten into:
13660 /// Slice = load SliceTy, Base + SliceOffset
13661 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13663 /// SliceTy is deduced from the number of bits that are actually used to
13664 /// build Inst.
13665 struct LoadedSlice {
13666 /// Helper structure used to compute the cost of a slice.
13667 struct Cost {
13668 /// Are we optimizing for code size.
13669 bool ForCodeSize;
13671 /// Various cost.
13672 unsigned Loads = 0;
13673 unsigned Truncates = 0;
13674 unsigned CrossRegisterBanksCopies = 0;
13675 unsigned ZExts = 0;
13676 unsigned Shift = 0;
13678 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13680 /// Get the cost of one isolated slice.
13681 Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13682 : ForCodeSize(ForCodeSize), Loads(1) {
13683 EVT TruncType = LS.Inst->getValueType(0);
13684 EVT LoadedType = LS.getLoadedType();
13685 if (TruncType != LoadedType &&
13686 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13687 ZExts = 1;
13690 /// Account for slicing gain in the current cost.
13691 /// Slicing provide a few gains like removing a shift or a
13692 /// truncate. This method allows to grow the cost of the original
13693 /// load with the gain from this slice.
13694 void addSliceGain(const LoadedSlice &LS) {
13695 // Each slice saves a truncate.
13696 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13697 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13698 LS.Inst->getValueType(0)))
13699 ++Truncates;
13700 // If there is a shift amount, this slice gets rid of it.
13701 if (LS.Shift)
13702 ++Shift;
13703 // If this slice can merge a cross register bank copy, account for it.
13704 if (LS.canMergeExpensiveCrossRegisterBankCopy())
13705 ++CrossRegisterBanksCopies;
13708 Cost &operator+=(const Cost &RHS) {
13709 Loads += RHS.Loads;
13710 Truncates += RHS.Truncates;
13711 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13712 ZExts += RHS.ZExts;
13713 Shift += RHS.Shift;
13714 return *this;
13717 bool operator==(const Cost &RHS) const {
13718 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13719 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13720 ZExts == RHS.ZExts && Shift == RHS.Shift;
13723 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13725 bool operator<(const Cost &RHS) const {
13726 // Assume cross register banks copies are as expensive as loads.
13727 // FIXME: Do we want some more target hooks?
13728 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13729 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13730 // Unless we are optimizing for code size, consider the
13731 // expensive operation first.
13732 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13733 return ExpensiveOpsLHS < ExpensiveOpsRHS;
13734 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13735 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13738 bool operator>(const Cost &RHS) const { return RHS < *this; }
13740 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13742 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13745 // The last instruction that represent the slice. This should be a
13746 // truncate instruction.
13747 SDNode *Inst;
13749 // The original load instruction.
13750 LoadSDNode *Origin;
13752 // The right shift amount in bits from the original load.
13753 unsigned Shift;
13755 // The DAG from which Origin came from.
13756 // This is used to get some contextual information about legal types, etc.
13757 SelectionDAG *DAG;
13759 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13760 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13761 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13763 /// Get the bits used in a chunk of bits \p BitWidth large.
13764 /// \return Result is \p BitWidth and has used bits set to 1 and
13765 /// not used bits set to 0.
13766 APInt getUsedBits() const {
13767 // Reproduce the trunc(lshr) sequence:
13768 // - Start from the truncated value.
13769 // - Zero extend to the desired bit width.
13770 // - Shift left.
13771 assert(Origin && "No original load to compare against.");
13772 unsigned BitWidth = Origin->getValueSizeInBits(0);
13773 assert(Inst && "This slice is not bound to an instruction");
13774 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13775 "Extracted slice is bigger than the whole type!");
13776 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13777 UsedBits.setAllBits();
13778 UsedBits = UsedBits.zext(BitWidth);
13779 UsedBits <<= Shift;
13780 return UsedBits;
13783 /// Get the size of the slice to be loaded in bytes.
13784 unsigned getLoadedSize() const {
13785 unsigned SliceSize = getUsedBits().countPopulation();
13786 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13787 return SliceSize / 8;
13790 /// Get the type that will be loaded for this slice.
13791 /// Note: This may not be the final type for the slice.
13792 EVT getLoadedType() const {
13793 assert(DAG && "Missing context");
13794 LLVMContext &Ctxt = *DAG->getContext();
13795 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13798 /// Get the alignment of the load used for this slice.
13799 unsigned getAlignment() const {
13800 unsigned Alignment = Origin->getAlignment();
13801 unsigned Offset = getOffsetFromBase();
13802 if (Offset != 0)
13803 Alignment = MinAlign(Alignment, Alignment + Offset);
13804 return Alignment;
13807 /// Check if this slice can be rewritten with legal operations.
13808 bool isLegal() const {
13809 // An invalid slice is not legal.
13810 if (!Origin || !Inst || !DAG)
13811 return false;
13813 // Offsets are for indexed load only, we do not handle that.
13814 if (!Origin->getOffset().isUndef())
13815 return false;
13817 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13819 // Check that the type is legal.
13820 EVT SliceType = getLoadedType();
13821 if (!TLI.isTypeLegal(SliceType))
13822 return false;
13824 // Check that the load is legal for this type.
13825 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13826 return false;
13828 // Check that the offset can be computed.
13829 // 1. Check its type.
13830 EVT PtrType = Origin->getBasePtr().getValueType();
13831 if (PtrType == MVT::Untyped || PtrType.isExtended())
13832 return false;
13834 // 2. Check that it fits in the immediate.
13835 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13836 return false;
13838 // 3. Check that the computation is legal.
13839 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13840 return false;
13842 // Check that the zext is legal if it needs one.
13843 EVT TruncateType = Inst->getValueType(0);
13844 if (TruncateType != SliceType &&
13845 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13846 return false;
13848 return true;
13851 /// Get the offset in bytes of this slice in the original chunk of
13852 /// bits.
13853 /// \pre DAG != nullptr.
13854 uint64_t getOffsetFromBase() const {
13855 assert(DAG && "Missing context.");
13856 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13857 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13858 uint64_t Offset = Shift / 8;
13859 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13860 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13861 "The size of the original loaded type is not a multiple of a"
13862 " byte.");
13863 // If Offset is bigger than TySizeInBytes, it means we are loading all
13864 // zeros. This should have been optimized before in the process.
13865 assert(TySizeInBytes > Offset &&
13866 "Invalid shift amount for given loaded size");
13867 if (IsBigEndian)
13868 Offset = TySizeInBytes - Offset - getLoadedSize();
13869 return Offset;
13872 /// Generate the sequence of instructions to load the slice
13873 /// represented by this object and redirect the uses of this slice to
13874 /// this new sequence of instructions.
13875 /// \pre this->Inst && this->Origin are valid Instructions and this
13876 /// object passed the legal check: LoadedSlice::isLegal returned true.
13877 /// \return The last instruction of the sequence used to load the slice.
13878 SDValue loadSlice() const {
13879 assert(Inst && Origin && "Unable to replace a non-existing slice.");
13880 const SDValue &OldBaseAddr = Origin->getBasePtr();
13881 SDValue BaseAddr = OldBaseAddr;
13882 // Get the offset in that chunk of bytes w.r.t. the endianness.
13883 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13884 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13885 if (Offset) {
13886 // BaseAddr = BaseAddr + Offset.
13887 EVT ArithType = BaseAddr.getValueType();
13888 SDLoc DL(Origin);
13889 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13890 DAG->getConstant(Offset, DL, ArithType));
13893 // Create the type of the loaded slice according to its size.
13894 EVT SliceType = getLoadedType();
13896 // Create the load for the slice.
13897 SDValue LastInst =
13898 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13899 Origin->getPointerInfo().getWithOffset(Offset),
13900 getAlignment(), Origin->getMemOperand()->getFlags());
13901 // If the final type is not the same as the loaded type, this means that
13902 // we have to pad with zero. Create a zero extend for that.
13903 EVT FinalType = Inst->getValueType(0);
13904 if (SliceType != FinalType)
13905 LastInst =
13906 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13907 return LastInst;
13910 /// Check if this slice can be merged with an expensive cross register
13911 /// bank copy. E.g.,
13912 /// i = load i32
13913 /// f = bitcast i32 i to float
13914 bool canMergeExpensiveCrossRegisterBankCopy() const {
13915 if (!Inst || !Inst->hasOneUse())
13916 return false;
13917 SDNode *Use = *Inst->use_begin();
13918 if (Use->getOpcode() != ISD::BITCAST)
13919 return false;
13920 assert(DAG && "Missing context");
13921 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13922 EVT ResVT = Use->getValueType(0);
13923 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13924 const TargetRegisterClass *ArgRC =
13925 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13926 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13927 return false;
13929 // At this point, we know that we perform a cross-register-bank copy.
13930 // Check if it is expensive.
13931 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13932 // Assume bitcasts are cheap, unless both register classes do not
13933 // explicitly share a common sub class.
13934 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13935 return false;
13937 // Check if it will be merged with the load.
13938 // 1. Check the alignment constraint.
13939 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13940 ResVT.getTypeForEVT(*DAG->getContext()));
13942 if (RequiredAlignment > getAlignment())
13943 return false;
13945 // 2. Check that the load is a legal operation for that type.
13946 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13947 return false;
13949 // 3. Check that we do not have a zext in the way.
13950 if (Inst->getValueType(0) != getLoadedType())
13951 return false;
13953 return true;
13957 } // end anonymous namespace
13959 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13960 /// \p UsedBits looks like 0..0 1..1 0..0.
13961 static bool areUsedBitsDense(const APInt &UsedBits) {
13962 // If all the bits are one, this is dense!
13963 if (UsedBits.isAllOnesValue())
13964 return true;
13966 // Get rid of the unused bits on the right.
13967 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13968 // Get rid of the unused bits on the left.
13969 if (NarrowedUsedBits.countLeadingZeros())
13970 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13971 // Check that the chunk of bits is completely used.
13972 return NarrowedUsedBits.isAllOnesValue();
13975 /// Check whether or not \p First and \p Second are next to each other
13976 /// in memory. This means that there is no hole between the bits loaded
13977 /// by \p First and the bits loaded by \p Second.
13978 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13979 const LoadedSlice &Second) {
13980 assert(First.Origin == Second.Origin && First.Origin &&
13981 "Unable to match different memory origins.");
13982 APInt UsedBits = First.getUsedBits();
13983 assert((UsedBits & Second.getUsedBits()) == 0 &&
13984 "Slices are not supposed to overlap.");
13985 UsedBits |= Second.getUsedBits();
13986 return areUsedBitsDense(UsedBits);
13989 /// Adjust the \p GlobalLSCost according to the target
13990 /// paring capabilities and the layout of the slices.
13991 /// \pre \p GlobalLSCost should account for at least as many loads as
13992 /// there is in the slices in \p LoadedSlices.
13993 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13994 LoadedSlice::Cost &GlobalLSCost) {
13995 unsigned NumberOfSlices = LoadedSlices.size();
13996 // If there is less than 2 elements, no pairing is possible.
13997 if (NumberOfSlices < 2)
13998 return;
14000 // Sort the slices so that elements that are likely to be next to each
14001 // other in memory are next to each other in the list.
14002 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14003 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14004 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14006 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14007 // First (resp. Second) is the first (resp. Second) potentially candidate
14008 // to be placed in a paired load.
14009 const LoadedSlice *First = nullptr;
14010 const LoadedSlice *Second = nullptr;
14011 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14012 // Set the beginning of the pair.
14013 First = Second) {
14014 Second = &LoadedSlices[CurrSlice];
14016 // If First is NULL, it means we start a new pair.
14017 // Get to the next slice.
14018 if (!First)
14019 continue;
14021 EVT LoadedType = First->getLoadedType();
14023 // If the types of the slices are different, we cannot pair them.
14024 if (LoadedType != Second->getLoadedType())
14025 continue;
14027 // Check if the target supplies paired loads for this type.
14028 unsigned RequiredAlignment = 0;
14029 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14030 // move to the next pair, this type is hopeless.
14031 Second = nullptr;
14032 continue;
14034 // Check if we meet the alignment requirement.
14035 if (RequiredAlignment > First->getAlignment())
14036 continue;
14038 // Check that both loads are next to each other in memory.
14039 if (!areSlicesNextToEachOther(*First, *Second))
14040 continue;
14042 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14043 --GlobalLSCost.Loads;
14044 // Move to the next pair.
14045 Second = nullptr;
14049 /// Check the profitability of all involved LoadedSlice.
14050 /// Currently, it is considered profitable if there is exactly two
14051 /// involved slices (1) which are (2) next to each other in memory, and
14052 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14054 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14055 /// the elements themselves.
14057 /// FIXME: When the cost model will be mature enough, we can relax
14058 /// constraints (1) and (2).
14059 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14060 const APInt &UsedBits, bool ForCodeSize) {
14061 unsigned NumberOfSlices = LoadedSlices.size();
14062 if (StressLoadSlicing)
14063 return NumberOfSlices > 1;
14065 // Check (1).
14066 if (NumberOfSlices != 2)
14067 return false;
14069 // Check (2).
14070 if (!areUsedBitsDense(UsedBits))
14071 return false;
14073 // Check (3).
14074 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14075 // The original code has one big load.
14076 OrigCost.Loads = 1;
14077 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14078 const LoadedSlice &LS = LoadedSlices[CurrSlice];
14079 // Accumulate the cost of all the slices.
14080 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14081 GlobalSlicingCost += SliceCost;
14083 // Account as cost in the original configuration the gain obtained
14084 // with the current slices.
14085 OrigCost.addSliceGain(LS);
14088 // If the target supports paired load, adjust the cost accordingly.
14089 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14090 return OrigCost > GlobalSlicingCost;
14093 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14094 /// operations, split it in the various pieces being extracted.
14096 /// This sort of thing is introduced by SROA.
14097 /// This slicing takes care not to insert overlapping loads.
14098 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14099 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14100 if (Level < AfterLegalizeDAG)
14101 return false;
14103 LoadSDNode *LD = cast<LoadSDNode>(N);
14104 if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
14105 !LD->getValueType(0).isInteger())
14106 return false;
14108 // Keep track of already used bits to detect overlapping values.
14109 // In that case, we will just abort the transformation.
14110 APInt UsedBits(LD->getValueSizeInBits(0), 0);
14112 SmallVector<LoadedSlice, 4> LoadedSlices;
14114 // Check if this load is used as several smaller chunks of bits.
14115 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14116 // of computation for each trunc.
14117 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14118 UI != UIEnd; ++UI) {
14119 // Skip the uses of the chain.
14120 if (UI.getUse().getResNo() != 0)
14121 continue;
14123 SDNode *User = *UI;
14124 unsigned Shift = 0;
14126 // Check if this is a trunc(lshr).
14127 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14128 isa<ConstantSDNode>(User->getOperand(1))) {
14129 Shift = User->getConstantOperandVal(1);
14130 User = *User->use_begin();
14133 // At this point, User is a Truncate, iff we encountered, trunc or
14134 // trunc(lshr).
14135 if (User->getOpcode() != ISD::TRUNCATE)
14136 return false;
14138 // The width of the type must be a power of 2 and greater than 8-bits.
14139 // Otherwise the load cannot be represented in LLVM IR.
14140 // Moreover, if we shifted with a non-8-bits multiple, the slice
14141 // will be across several bytes. We do not support that.
14142 unsigned Width = User->getValueSizeInBits(0);
14143 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14144 return false;
14146 // Build the slice for this chain of computations.
14147 LoadedSlice LS(User, LD, Shift, &DAG);
14148 APInt CurrentUsedBits = LS.getUsedBits();
14150 // Check if this slice overlaps with another.
14151 if ((CurrentUsedBits & UsedBits) != 0)
14152 return false;
14153 // Update the bits used globally.
14154 UsedBits |= CurrentUsedBits;
14156 // Check if the new slice would be legal.
14157 if (!LS.isLegal())
14158 return false;
14160 // Record the slice.
14161 LoadedSlices.push_back(LS);
14164 // Abort slicing if it does not seem to be profitable.
14165 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14166 return false;
14168 ++SlicedLoads;
14170 // Rewrite each chain to use an independent load.
14171 // By construction, each chain can be represented by a unique load.
14173 // Prepare the argument for the new token factor for all the slices.
14174 SmallVector<SDValue, 8> ArgChains;
14175 for (SmallVectorImpl<LoadedSlice>::const_iterator
14176 LSIt = LoadedSlices.begin(),
14177 LSItEnd = LoadedSlices.end();
14178 LSIt != LSItEnd; ++LSIt) {
14179 SDValue SliceInst = LSIt->loadSlice();
14180 CombineTo(LSIt->Inst, SliceInst, true);
14181 if (SliceInst.getOpcode() != ISD::LOAD)
14182 SliceInst = SliceInst.getOperand(0);
14183 assert(SliceInst->getOpcode() == ISD::LOAD &&
14184 "It takes more than a zext to get to the loaded slice!!");
14185 ArgChains.push_back(SliceInst.getValue(1));
14188 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14189 ArgChains);
14190 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14191 AddToWorklist(Chain.getNode());
14192 return true;
14195 /// Check to see if V is (and load (ptr), imm), where the load is having
14196 /// specific bytes cleared out. If so, return the byte size being masked out
14197 /// and the shift amount.
14198 static std::pair<unsigned, unsigned>
14199 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14200 std::pair<unsigned, unsigned> Result(0, 0);
14202 // Check for the structure we're looking for.
14203 if (V->getOpcode() != ISD::AND ||
14204 !isa<ConstantSDNode>(V->getOperand(1)) ||
14205 !ISD::isNormalLoad(V->getOperand(0).getNode()))
14206 return Result;
14208 // Check the chain and pointer.
14209 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14210 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
14212 // This only handles simple types.
14213 if (V.getValueType() != MVT::i16 &&
14214 V.getValueType() != MVT::i32 &&
14215 V.getValueType() != MVT::i64)
14216 return Result;
14218 // Check the constant mask. Invert it so that the bits being masked out are
14219 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
14220 // follow the sign bit for uniformity.
14221 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14222 unsigned NotMaskLZ = countLeadingZeros(NotMask);
14223 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
14224 unsigned NotMaskTZ = countTrailingZeros(NotMask);
14225 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
14226 if (NotMaskLZ == 64) return Result; // All zero mask.
14228 // See if we have a continuous run of bits. If so, we have 0*1+0*
14229 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14230 return Result;
14232 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14233 if (V.getValueType() != MVT::i64 && NotMaskLZ)
14234 NotMaskLZ -= 64-V.getValueSizeInBits();
14236 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14237 switch (MaskedBytes) {
14238 case 1:
14239 case 2:
14240 case 4: break;
14241 default: return Result; // All one mask, or 5-byte mask.
14244 // Verify that the first bit starts at a multiple of mask so that the access
14245 // is aligned the same as the access width.
14246 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14248 // For narrowing to be valid, it must be the case that the load the
14249 // immediately preceding memory operation before the store.
14250 if (LD == Chain.getNode())
14251 ; // ok.
14252 else if (Chain->getOpcode() == ISD::TokenFactor &&
14253 SDValue(LD, 1).hasOneUse()) {
14254 // LD has only 1 chain use so they are no indirect dependencies.
14255 bool isOk = false;
14256 for (const SDValue &ChainOp : Chain->op_values())
14257 if (ChainOp.getNode() == LD) {
14258 isOk = true;
14259 break;
14261 if (!isOk)
14262 return Result;
14263 } else
14264 return Result; // Fail.
14266 Result.first = MaskedBytes;
14267 Result.second = NotMaskTZ/8;
14268 return Result;
14271 /// Check to see if IVal is something that provides a value as specified by
14272 /// MaskInfo. If so, replace the specified store with a narrower store of
14273 /// truncated IVal.
14274 static SDNode *
14275 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14276 SDValue IVal, StoreSDNode *St,
14277 DAGCombiner *DC) {
14278 unsigned NumBytes = MaskInfo.first;
14279 unsigned ByteShift = MaskInfo.second;
14280 SelectionDAG &DAG = DC->getDAG();
14282 // Check to see if IVal is all zeros in the part being masked in by the 'or'
14283 // that uses this. If not, this is not a replacement.
14284 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14285 ByteShift*8, (ByteShift+NumBytes)*8);
14286 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14288 // Check that it is legal on the target to do this. It is legal if the new
14289 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14290 // legalization.
14291 MVT VT = MVT::getIntegerVT(NumBytes*8);
14292 if (!DC->isTypeLegal(VT))
14293 return nullptr;
14295 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
14296 // shifted by ByteShift and truncated down to NumBytes.
14297 if (ByteShift) {
14298 SDLoc DL(IVal);
14299 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14300 DAG.getConstant(ByteShift*8, DL,
14301 DC->getShiftAmountTy(IVal.getValueType())));
14304 // Figure out the offset for the store and the alignment of the access.
14305 unsigned StOffset;
14306 unsigned NewAlign = St->getAlignment();
14308 if (DAG.getDataLayout().isLittleEndian())
14309 StOffset = ByteShift;
14310 else
14311 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14313 SDValue Ptr = St->getBasePtr();
14314 if (StOffset) {
14315 SDLoc DL(IVal);
14316 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14317 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14318 NewAlign = MinAlign(NewAlign, StOffset);
14321 // Truncate down to the new size.
14322 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14324 ++OpsNarrowed;
14325 return DAG
14326 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14327 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14328 .getNode();
14331 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14332 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14333 /// narrowing the load and store if it would end up being a win for performance
14334 /// or code size.
14335 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14336 StoreSDNode *ST = cast<StoreSDNode>(N);
14337 if (ST->isVolatile())
14338 return SDValue();
14340 SDValue Chain = ST->getChain();
14341 SDValue Value = ST->getValue();
14342 SDValue Ptr = ST->getBasePtr();
14343 EVT VT = Value.getValueType();
14345 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14346 return SDValue();
14348 unsigned Opc = Value.getOpcode();
14350 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14351 // is a byte mask indicating a consecutive number of bytes, check to see if
14352 // Y is known to provide just those bytes. If so, we try to replace the
14353 // load + replace + store sequence with a single (narrower) store, which makes
14354 // the load dead.
14355 if (Opc == ISD::OR) {
14356 std::pair<unsigned, unsigned> MaskedLoad;
14357 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14358 if (MaskedLoad.first)
14359 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14360 Value.getOperand(1), ST,this))
14361 return SDValue(NewST, 0);
14363 // Or is commutative, so try swapping X and Y.
14364 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14365 if (MaskedLoad.first)
14366 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14367 Value.getOperand(0), ST,this))
14368 return SDValue(NewST, 0);
14371 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14372 Value.getOperand(1).getOpcode() != ISD::Constant)
14373 return SDValue();
14375 SDValue N0 = Value.getOperand(0);
14376 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14377 Chain == SDValue(N0.getNode(), 1)) {
14378 LoadSDNode *LD = cast<LoadSDNode>(N0);
14379 if (LD->getBasePtr() != Ptr ||
14380 LD->getPointerInfo().getAddrSpace() !=
14381 ST->getPointerInfo().getAddrSpace())
14382 return SDValue();
14384 // Find the type to narrow it the load / op / store to.
14385 SDValue N1 = Value.getOperand(1);
14386 unsigned BitWidth = N1.getValueSizeInBits();
14387 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14388 if (Opc == ISD::AND)
14389 Imm ^= APInt::getAllOnesValue(BitWidth);
14390 if (Imm == 0 || Imm.isAllOnesValue())
14391 return SDValue();
14392 unsigned ShAmt = Imm.countTrailingZeros();
14393 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14394 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14395 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14396 // The narrowing should be profitable, the load/store operation should be
14397 // legal (or custom) and the store size should be equal to the NewVT width.
14398 while (NewBW < BitWidth &&
14399 (NewVT.getStoreSizeInBits() != NewBW ||
14400 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14401 !TLI.isNarrowingProfitable(VT, NewVT))) {
14402 NewBW = NextPowerOf2(NewBW);
14403 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14405 if (NewBW >= BitWidth)
14406 return SDValue();
14408 // If the lsb changed does not start at the type bitwidth boundary,
14409 // start at the previous one.
14410 if (ShAmt % NewBW)
14411 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14412 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14413 std::min(BitWidth, ShAmt + NewBW));
14414 if ((Imm & Mask) == Imm) {
14415 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14416 if (Opc == ISD::AND)
14417 NewImm ^= APInt::getAllOnesValue(NewBW);
14418 uint64_t PtrOff = ShAmt / 8;
14419 // For big endian targets, we need to adjust the offset to the pointer to
14420 // load the correct bytes.
14421 if (DAG.getDataLayout().isBigEndian())
14422 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14424 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14425 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14426 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14427 return SDValue();
14429 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14430 Ptr.getValueType(), Ptr,
14431 DAG.getConstant(PtrOff, SDLoc(LD),
14432 Ptr.getValueType()));
14433 SDValue NewLD =
14434 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14435 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14436 LD->getMemOperand()->getFlags(), LD->getAAInfo());
14437 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14438 DAG.getConstant(NewImm, SDLoc(Value),
14439 NewVT));
14440 SDValue NewST =
14441 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14442 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14444 AddToWorklist(NewPtr.getNode());
14445 AddToWorklist(NewLD.getNode());
14446 AddToWorklist(NewVal.getNode());
14447 WorklistRemover DeadNodes(*this);
14448 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14449 ++OpsNarrowed;
14450 return NewST;
14454 return SDValue();
14457 /// For a given floating point load / store pair, if the load value isn't used
14458 /// by any other operations, then consider transforming the pair to integer
14459 /// load / store operations if the target deems the transformation profitable.
14460 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14461 StoreSDNode *ST = cast<StoreSDNode>(N);
14462 SDValue Chain = ST->getChain();
14463 SDValue Value = ST->getValue();
14464 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14465 Value.hasOneUse() &&
14466 Chain == SDValue(Value.getNode(), 1)) {
14467 LoadSDNode *LD = cast<LoadSDNode>(Value);
14468 EVT VT = LD->getMemoryVT();
14469 if (!VT.isFloatingPoint() ||
14470 VT != ST->getMemoryVT() ||
14471 LD->isNonTemporal() ||
14472 ST->isNonTemporal() ||
14473 LD->getPointerInfo().getAddrSpace() != 0 ||
14474 ST->getPointerInfo().getAddrSpace() != 0)
14475 return SDValue();
14477 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14478 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14479 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14480 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14481 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14482 return SDValue();
14484 unsigned LDAlign = LD->getAlignment();
14485 unsigned STAlign = ST->getAlignment();
14486 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14487 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14488 if (LDAlign < ABIAlign || STAlign < ABIAlign)
14489 return SDValue();
14491 SDValue NewLD =
14492 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14493 LD->getPointerInfo(), LDAlign);
14495 SDValue NewST =
14496 DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14497 ST->getPointerInfo(), STAlign);
14499 AddToWorklist(NewLD.getNode());
14500 AddToWorklist(NewST.getNode());
14501 WorklistRemover DeadNodes(*this);
14502 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14503 ++LdStFP2Int;
14504 return NewST;
14507 return SDValue();
14510 // This is a helper function for visitMUL to check the profitability
14511 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14512 // MulNode is the original multiply, AddNode is (add x, c1),
14513 // and ConstNode is c2.
14515 // If the (add x, c1) has multiple uses, we could increase
14516 // the number of adds if we make this transformation.
14517 // It would only be worth doing this if we can remove a
14518 // multiply in the process. Check for that here.
14519 // To illustrate:
14520 // (A + c1) * c3
14521 // (A + c2) * c3
14522 // We're checking for cases where we have common "c3 * A" expressions.
14523 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14524 SDValue &AddNode,
14525 SDValue &ConstNode) {
14526 APInt Val;
14528 // If the add only has one use, this would be OK to do.
14529 if (AddNode.getNode()->hasOneUse())
14530 return true;
14532 // Walk all the users of the constant with which we're multiplying.
14533 for (SDNode *Use : ConstNode->uses()) {
14534 if (Use == MulNode) // This use is the one we're on right now. Skip it.
14535 continue;
14537 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14538 SDNode *OtherOp;
14539 SDNode *MulVar = AddNode.getOperand(0).getNode();
14541 // OtherOp is what we're multiplying against the constant.
14542 if (Use->getOperand(0) == ConstNode)
14543 OtherOp = Use->getOperand(1).getNode();
14544 else
14545 OtherOp = Use->getOperand(0).getNode();
14547 // Check to see if multiply is with the same operand of our "add".
14549 // ConstNode = CONST
14550 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
14551 // ...
14552 // AddNode = (A + c1) <-- MulVar is A.
14553 // = AddNode * ConstNode <-- current visiting instruction.
14555 // If we make this transformation, we will have a common
14556 // multiply (ConstNode * A) that we can save.
14557 if (OtherOp == MulVar)
14558 return true;
14560 // Now check to see if a future expansion will give us a common
14561 // multiply.
14563 // ConstNode = CONST
14564 // AddNode = (A + c1)
14565 // ... = AddNode * ConstNode <-- current visiting instruction.
14566 // ...
14567 // OtherOp = (A + c2)
14568 // Use = OtherOp * ConstNode <-- visiting Use.
14570 // If we make this transformation, we will have a common
14571 // multiply (CONST * A) after we also do the same transformation
14572 // to the "t2" instruction.
14573 if (OtherOp->getOpcode() == ISD::ADD &&
14574 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14575 OtherOp->getOperand(0).getNode() == MulVar)
14576 return true;
14580 // Didn't find a case where this would be profitable.
14581 return false;
14584 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14585 unsigned NumStores) {
14586 SmallVector<SDValue, 8> Chains;
14587 SmallPtrSet<const SDNode *, 8> Visited;
14588 SDLoc StoreDL(StoreNodes[0].MemNode);
14590 for (unsigned i = 0; i < NumStores; ++i) {
14591 Visited.insert(StoreNodes[i].MemNode);
14594 // don't include nodes that are children or repeated nodes.
14595 for (unsigned i = 0; i < NumStores; ++i) {
14596 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
14597 Chains.push_back(StoreNodes[i].MemNode->getChain());
14600 assert(Chains.size() > 0 && "Chain should have generated a chain");
14601 return DAG.getTokenFactor(StoreDL, Chains);
14604 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14605 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14606 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14607 // Make sure we have something to merge.
14608 if (NumStores < 2)
14609 return false;
14611 // The latest Node in the DAG.
14612 SDLoc DL(StoreNodes[0].MemNode);
14614 int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14615 unsigned SizeInBits = NumStores * ElementSizeBits;
14616 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14618 EVT StoreTy;
14619 if (UseVector) {
14620 unsigned Elts = NumStores * NumMemElts;
14621 // Get the type for the merged vector store.
14622 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14623 } else
14624 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14626 SDValue StoredVal;
14627 if (UseVector) {
14628 if (IsConstantSrc) {
14629 SmallVector<SDValue, 8> BuildVector;
14630 for (unsigned I = 0; I != NumStores; ++I) {
14631 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14632 SDValue Val = St->getValue();
14633 // If constant is of the wrong type, convert it now.
14634 if (MemVT != Val.getValueType()) {
14635 Val = peekThroughBitcasts(Val);
14636 // Deal with constants of wrong size.
14637 if (ElementSizeBits != Val.getValueSizeInBits()) {
14638 EVT IntMemVT =
14639 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14640 if (isa<ConstantFPSDNode>(Val)) {
14641 // Not clear how to truncate FP values.
14642 return false;
14643 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14644 Val = DAG.getConstant(C->getAPIntValue()
14645 .zextOrTrunc(Val.getValueSizeInBits())
14646 .zextOrTrunc(ElementSizeBits),
14647 SDLoc(C), IntMemVT);
14649 // Make sure correctly size type is the correct type.
14650 Val = DAG.getBitcast(MemVT, Val);
14652 BuildVector.push_back(Val);
14654 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14655 : ISD::BUILD_VECTOR,
14656 DL, StoreTy, BuildVector);
14657 } else {
14658 SmallVector<SDValue, 8> Ops;
14659 for (unsigned i = 0; i < NumStores; ++i) {
14660 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14661 SDValue Val = peekThroughBitcasts(St->getValue());
14662 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14663 // type MemVT. If the underlying value is not the correct
14664 // type, but it is an extraction of an appropriate vector we
14665 // can recast Val to be of the correct type. This may require
14666 // converting between EXTRACT_VECTOR_ELT and
14667 // EXTRACT_SUBVECTOR.
14668 if ((MemVT != Val.getValueType()) &&
14669 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14670 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14671 EVT MemVTScalarTy = MemVT.getScalarType();
14672 // We may need to add a bitcast here to get types to line up.
14673 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14674 Val = DAG.getBitcast(MemVT, Val);
14675 } else {
14676 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14677 : ISD::EXTRACT_VECTOR_ELT;
14678 SDValue Vec = Val.getOperand(0);
14679 SDValue Idx = Val.getOperand(1);
14680 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14683 Ops.push_back(Val);
14686 // Build the extracted vector elements back into a vector.
14687 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14688 : ISD::BUILD_VECTOR,
14689 DL, StoreTy, Ops);
14691 } else {
14692 // We should always use a vector store when merging extracted vector
14693 // elements, so this path implies a store of constants.
14694 assert(IsConstantSrc && "Merged vector elements should use vector store");
14696 APInt StoreInt(SizeInBits, 0);
14698 // Construct a single integer constant which is made of the smaller
14699 // constant inputs.
14700 bool IsLE = DAG.getDataLayout().isLittleEndian();
14701 for (unsigned i = 0; i < NumStores; ++i) {
14702 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14703 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14705 SDValue Val = St->getValue();
14706 Val = peekThroughBitcasts(Val);
14707 StoreInt <<= ElementSizeBits;
14708 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14709 StoreInt |= C->getAPIntValue()
14710 .zextOrTrunc(ElementSizeBits)
14711 .zextOrTrunc(SizeInBits);
14712 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14713 StoreInt |= C->getValueAPF()
14714 .bitcastToAPInt()
14715 .zextOrTrunc(ElementSizeBits)
14716 .zextOrTrunc(SizeInBits);
14717 // If fp truncation is necessary give up for now.
14718 if (MemVT.getSizeInBits() != ElementSizeBits)
14719 return false;
14720 } else {
14721 llvm_unreachable("Invalid constant element type");
14725 // Create the new Load and Store operations.
14726 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14729 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14730 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14732 // make sure we use trunc store if it's necessary to be legal.
14733 SDValue NewStore;
14734 if (!UseTrunc) {
14735 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14736 FirstInChain->getPointerInfo(),
14737 FirstInChain->getAlignment());
14738 } else { // Must be realized as a trunc store
14739 EVT LegalizedStoredValTy =
14740 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14741 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14742 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14743 SDValue ExtendedStoreVal =
14744 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14745 LegalizedStoredValTy);
14746 NewStore = DAG.getTruncStore(
14747 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14748 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14749 FirstInChain->getAlignment(),
14750 FirstInChain->getMemOperand()->getFlags());
14753 // Replace all merged stores with the new store.
14754 for (unsigned i = 0; i < NumStores; ++i)
14755 CombineTo(StoreNodes[i].MemNode, NewStore);
14757 AddToWorklist(NewChain.getNode());
14758 return true;
14761 void DAGCombiner::getStoreMergeCandidates(
14762 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14763 SDNode *&RootNode) {
14764 // This holds the base pointer, index, and the offset in bytes from the base
14765 // pointer.
14766 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14767 EVT MemVT = St->getMemoryVT();
14769 SDValue Val = peekThroughBitcasts(St->getValue());
14770 // We must have a base and an offset.
14771 if (!BasePtr.getBase().getNode())
14772 return;
14774 // Do not handle stores to undef base pointers.
14775 if (BasePtr.getBase().isUndef())
14776 return;
14778 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14779 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14780 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14781 bool IsLoadSrc = isa<LoadSDNode>(Val);
14782 BaseIndexOffset LBasePtr;
14783 // Match on loadbaseptr if relevant.
14784 EVT LoadVT;
14785 if (IsLoadSrc) {
14786 auto *Ld = cast<LoadSDNode>(Val);
14787 LBasePtr = BaseIndexOffset::match(Ld, DAG);
14788 LoadVT = Ld->getMemoryVT();
14789 // Load and store should be the same type.
14790 if (MemVT != LoadVT)
14791 return;
14792 // Loads must only have one use.
14793 if (!Ld->hasNUsesOfValue(1, 0))
14794 return;
14795 // The memory operands must not be volatile.
14796 if (Ld->isVolatile() || Ld->isIndexed())
14797 return;
14799 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14800 int64_t &Offset) -> bool {
14801 if (Other->isVolatile() || Other->isIndexed())
14802 return false;
14803 SDValue Val = peekThroughBitcasts(Other->getValue());
14804 // Allow merging constants of different types as integers.
14805 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14806 : Other->getMemoryVT() != MemVT;
14807 if (IsLoadSrc) {
14808 if (NoTypeMatch)
14809 return false;
14810 // The Load's Base Ptr must also match
14811 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14812 auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14813 if (LoadVT != OtherLd->getMemoryVT())
14814 return false;
14815 // Loads must only have one use.
14816 if (!OtherLd->hasNUsesOfValue(1, 0))
14817 return false;
14818 // The memory operands must not be volatile.
14819 if (OtherLd->isVolatile() || OtherLd->isIndexed())
14820 return false;
14821 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14822 return false;
14823 } else
14824 return false;
14826 if (IsConstantSrc) {
14827 if (NoTypeMatch)
14828 return false;
14829 if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14830 return false;
14832 if (IsExtractVecSrc) {
14833 // Do not merge truncated stores here.
14834 if (Other->isTruncatingStore())
14835 return false;
14836 if (!MemVT.bitsEq(Val.getValueType()))
14837 return false;
14838 if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14839 Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14840 return false;
14842 Ptr = BaseIndexOffset::match(Other, DAG);
14843 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14846 // We looking for a root node which is an ancestor to all mergable
14847 // stores. We search up through a load, to our root and then down
14848 // through all children. For instance we will find Store{1,2,3} if
14849 // St is Store1, Store2. or Store3 where the root is not a load
14850 // which always true for nonvolatile ops. TODO: Expand
14851 // the search to find all valid candidates through multiple layers of loads.
14853 // Root
14854 // |-------|-------|
14855 // Load Load Store3
14856 // | |
14857 // Store1 Store2
14859 // FIXME: We should be able to climb and
14860 // descend TokenFactors to find candidates as well.
14862 RootNode = St->getChain().getNode();
14864 unsigned NumNodesExplored = 0;
14865 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14866 RootNode = Ldn->getChain().getNode();
14867 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
14868 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
14869 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14870 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14871 if (I2.getOperandNo() == 0)
14872 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14873 BaseIndexOffset Ptr;
14874 int64_t PtrDiff;
14875 if (CandidateMatch(OtherST, Ptr, PtrDiff))
14876 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14878 } else
14879 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
14880 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
14881 if (I.getOperandNo() == 0)
14882 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14883 BaseIndexOffset Ptr;
14884 int64_t PtrDiff;
14885 if (CandidateMatch(OtherST, Ptr, PtrDiff))
14886 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14890 // We need to check that merging these stores does not cause a loop in
14891 // the DAG. Any store candidate may depend on another candidate
14892 // indirectly through its operand (we already consider dependencies
14893 // through the chain). Check in parallel by searching up from
14894 // non-chain operands of candidates.
14895 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14896 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14897 SDNode *RootNode) {
14898 // FIXME: We should be able to truncate a full search of
14899 // predecessors by doing a BFS and keeping tabs the originating
14900 // stores from which worklist nodes come from in a similar way to
14901 // TokenFactor simplfication.
14903 SmallPtrSet<const SDNode *, 32> Visited;
14904 SmallVector<const SDNode *, 8> Worklist;
14906 // RootNode is a predecessor to all candidates so we need not search
14907 // past it. Add RootNode (peeking through TokenFactors). Do not count
14908 // these towards size check.
14910 Worklist.push_back(RootNode);
14911 while (!Worklist.empty()) {
14912 auto N = Worklist.pop_back_val();
14913 if (!Visited.insert(N).second)
14914 continue; // Already present in Visited.
14915 if (N->getOpcode() == ISD::TokenFactor) {
14916 for (SDValue Op : N->ops())
14917 Worklist.push_back(Op.getNode());
14921 // Don't count pruning nodes towards max.
14922 unsigned int Max = 1024 + Visited.size();
14923 // Search Ops of store candidates.
14924 for (unsigned i = 0; i < NumStores; ++i) {
14925 SDNode *N = StoreNodes[i].MemNode;
14926 // Of the 4 Store Operands:
14927 // * Chain (Op 0) -> We have already considered these
14928 // in candidate selection and can be
14929 // safely ignored
14930 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14931 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14932 // but aren't necessarily fromt the same base node, so
14933 // cycles possible (e.g. via indexed store).
14934 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14935 // non-indexed stores). Not constant on all targets (e.g. ARM)
14936 // and so can participate in a cycle.
14937 for (unsigned j = 1; j < N->getNumOperands(); ++j)
14938 Worklist.push_back(N->getOperand(j).getNode());
14940 // Search through DAG. We can stop early if we find a store node.
14941 for (unsigned i = 0; i < NumStores; ++i)
14942 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14943 Max))
14944 return false;
14945 return true;
14948 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14949 if (OptLevel == CodeGenOpt::None)
14950 return false;
14952 EVT MemVT = St->getMemoryVT();
14953 int64_t ElementSizeBytes = MemVT.getStoreSize();
14954 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14956 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14957 return false;
14959 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14960 Attribute::NoImplicitFloat);
14962 // This function cannot currently deal with non-byte-sized memory sizes.
14963 if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14964 return false;
14966 if (!MemVT.isSimple())
14967 return false;
14969 // Perform an early exit check. Do not bother looking at stored values that
14970 // are not constants, loads, or extracted vector elements.
14971 SDValue StoredVal = peekThroughBitcasts(St->getValue());
14972 bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14973 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14974 isa<ConstantFPSDNode>(StoredVal);
14975 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14976 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14978 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14979 return false;
14981 SmallVector<MemOpLink, 8> StoreNodes;
14982 SDNode *RootNode;
14983 // Find potential store merge candidates by searching through chain sub-DAG
14984 getStoreMergeCandidates(St, StoreNodes, RootNode);
14986 // Check if there is anything to merge.
14987 if (StoreNodes.size() < 2)
14988 return false;
14990 // Sort the memory operands according to their distance from the
14991 // base pointer.
14992 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14993 return LHS.OffsetFromBase < RHS.OffsetFromBase;
14996 // Store Merge attempts to merge the lowest stores. This generally
14997 // works out as if successful, as the remaining stores are checked
14998 // after the first collection of stores is merged. However, in the
14999 // case that a non-mergeable store is found first, e.g., {p[-2],
15000 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15001 // mergeable cases. To prevent this, we prune such stores from the
15002 // front of StoreNodes here.
15004 bool RV = false;
15005 while (StoreNodes.size() > 1) {
15006 unsigned StartIdx = 0;
15007 while ((StartIdx + 1 < StoreNodes.size()) &&
15008 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15009 StoreNodes[StartIdx + 1].OffsetFromBase)
15010 ++StartIdx;
15012 // Bail if we don't have enough candidates to merge.
15013 if (StartIdx + 1 >= StoreNodes.size())
15014 return RV;
15016 if (StartIdx)
15017 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15019 // Scan the memory operations on the chain and find the first
15020 // non-consecutive store memory address.
15021 unsigned NumConsecutiveStores = 1;
15022 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15023 // Check that the addresses are consecutive starting from the second
15024 // element in the list of stores.
15025 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15026 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15027 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15028 break;
15029 NumConsecutiveStores = i + 1;
15032 if (NumConsecutiveStores < 2) {
15033 StoreNodes.erase(StoreNodes.begin(),
15034 StoreNodes.begin() + NumConsecutiveStores);
15035 continue;
15038 // The node with the lowest store address.
15039 LLVMContext &Context = *DAG.getContext();
15040 const DataLayout &DL = DAG.getDataLayout();
15042 // Store the constants into memory as one consecutive store.
15043 if (IsConstantSrc) {
15044 while (NumConsecutiveStores >= 2) {
15045 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15046 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15047 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15048 unsigned LastLegalType = 1;
15049 unsigned LastLegalVectorType = 1;
15050 bool LastIntegerTrunc = false;
15051 bool NonZero = false;
15052 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15053 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15054 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15055 SDValue StoredVal = ST->getValue();
15056 bool IsElementZero = false;
15057 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15058 IsElementZero = C->isNullValue();
15059 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15060 IsElementZero = C->getConstantFPValue()->isNullValue();
15061 if (IsElementZero) {
15062 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15063 FirstZeroAfterNonZero = i;
15065 NonZero |= !IsElementZero;
15067 // Find a legal type for the constant store.
15068 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15069 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15070 bool IsFast = false;
15072 // Break early when size is too large to be legal.
15073 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15074 break;
15076 if (TLI.isTypeLegal(StoreTy) &&
15077 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15078 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15079 FirstStoreAlign, &IsFast) &&
15080 IsFast) {
15081 LastIntegerTrunc = false;
15082 LastLegalType = i + 1;
15083 // Or check whether a truncstore is legal.
15084 } else if (TLI.getTypeAction(Context, StoreTy) ==
15085 TargetLowering::TypePromoteInteger) {
15086 EVT LegalizedStoredValTy =
15087 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15088 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15089 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15090 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15091 FirstStoreAlign, &IsFast) &&
15092 IsFast) {
15093 LastIntegerTrunc = true;
15094 LastLegalType = i + 1;
15098 // We only use vectors if the constant is known to be zero or the
15099 // target allows it and the function is not marked with the
15100 // noimplicitfloat attribute.
15101 if ((!NonZero ||
15102 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15103 !NoVectors) {
15104 // Find a legal type for the vector store.
15105 unsigned Elts = (i + 1) * NumMemElts;
15106 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15107 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15108 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15109 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
15110 FirstStoreAlign, &IsFast) &&
15111 IsFast)
15112 LastLegalVectorType = i + 1;
15116 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15117 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15119 // Check if we found a legal integer type that creates a meaningful
15120 // merge.
15121 if (NumElem < 2) {
15122 // We know that candidate stores are in order and of correct
15123 // shape. While there is no mergeable sequence from the
15124 // beginning one may start later in the sequence. The only
15125 // reason a merge of size N could have failed where another of
15126 // the same size would not have, is if the alignment has
15127 // improved or we've dropped a non-zero value. Drop as many
15128 // candidates as we can here.
15129 unsigned NumSkip = 1;
15130 while (
15131 (NumSkip < NumConsecutiveStores) &&
15132 (NumSkip < FirstZeroAfterNonZero) &&
15133 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15134 NumSkip++;
15136 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15137 NumConsecutiveStores -= NumSkip;
15138 continue;
15141 // Check that we can merge these candidates without causing a cycle.
15142 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15143 RootNode)) {
15144 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15145 NumConsecutiveStores -= NumElem;
15146 continue;
15149 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15150 UseVector, LastIntegerTrunc);
15152 // Remove merged stores for next iteration.
15153 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15154 NumConsecutiveStores -= NumElem;
15156 continue;
15159 // When extracting multiple vector elements, try to store them
15160 // in one vector store rather than a sequence of scalar stores.
15161 if (IsExtractVecSrc) {
15162 // Loop on Consecutive Stores on success.
15163 while (NumConsecutiveStores >= 2) {
15164 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15165 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15166 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15167 unsigned NumStoresToMerge = 1;
15168 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15169 // Find a legal type for the vector store.
15170 unsigned Elts = (i + 1) * NumMemElts;
15171 EVT Ty =
15172 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15173 bool IsFast;
15175 // Break early when size is too large to be legal.
15176 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15177 break;
15179 if (TLI.isTypeLegal(Ty) &&
15180 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15181 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
15182 FirstStoreAlign, &IsFast) &&
15183 IsFast)
15184 NumStoresToMerge = i + 1;
15187 // Check if we found a legal integer type creating a meaningful
15188 // merge.
15189 if (NumStoresToMerge < 2) {
15190 // We know that candidate stores are in order and of correct
15191 // shape. While there is no mergeable sequence from the
15192 // beginning one may start later in the sequence. The only
15193 // reason a merge of size N could have failed where another of
15194 // the same size would not have, is if the alignment has
15195 // improved. Drop as many candidates as we can here.
15196 unsigned NumSkip = 1;
15197 while (
15198 (NumSkip < NumConsecutiveStores) &&
15199 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15200 NumSkip++;
15202 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15203 NumConsecutiveStores -= NumSkip;
15204 continue;
15207 // Check that we can merge these candidates without causing a cycle.
15208 if (!checkMergeStoreCandidatesForDependencies(
15209 StoreNodes, NumStoresToMerge, RootNode)) {
15210 StoreNodes.erase(StoreNodes.begin(),
15211 StoreNodes.begin() + NumStoresToMerge);
15212 NumConsecutiveStores -= NumStoresToMerge;
15213 continue;
15216 RV |= MergeStoresOfConstantsOrVecElts(
15217 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15219 StoreNodes.erase(StoreNodes.begin(),
15220 StoreNodes.begin() + NumStoresToMerge);
15221 NumConsecutiveStores -= NumStoresToMerge;
15223 continue;
15226 // Below we handle the case of multiple consecutive stores that
15227 // come from multiple consecutive loads. We merge them into a single
15228 // wide load and a single wide store.
15230 // Look for load nodes which are used by the stored values.
15231 SmallVector<MemOpLink, 8> LoadNodes;
15233 // Find acceptable loads. Loads need to have the same chain (token factor),
15234 // must not be zext, volatile, indexed, and they must be consecutive.
15235 BaseIndexOffset LdBasePtr;
15237 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15238 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15239 SDValue Val = peekThroughBitcasts(St->getValue());
15240 LoadSDNode *Ld = cast<LoadSDNode>(Val);
15242 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15243 // If this is not the first ptr that we check.
15244 int64_t LdOffset = 0;
15245 if (LdBasePtr.getBase().getNode()) {
15246 // The base ptr must be the same.
15247 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15248 break;
15249 } else {
15250 // Check that all other base pointers are the same as this one.
15251 LdBasePtr = LdPtr;
15254 // We found a potential memory operand to merge.
15255 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15258 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15259 // If we have load/store pair instructions and we only have two values,
15260 // don't bother merging.
15261 unsigned RequiredAlignment;
15262 if (LoadNodes.size() == 2 &&
15263 TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15264 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15265 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15266 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15267 break;
15269 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15270 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15271 unsigned FirstStoreAlign = FirstInChain->getAlignment();
15272 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15273 unsigned FirstLoadAS = FirstLoad->getAddressSpace();
15274 unsigned FirstLoadAlign = FirstLoad->getAlignment();
15276 // Scan the memory operations on the chain and find the first
15277 // non-consecutive load memory address. These variables hold the index in
15278 // the store node array.
15280 unsigned LastConsecutiveLoad = 1;
15282 // This variable refers to the size and not index in the array.
15283 unsigned LastLegalVectorType = 1;
15284 unsigned LastLegalIntegerType = 1;
15285 bool isDereferenceable = true;
15286 bool DoIntegerTruncate = false;
15287 StartAddress = LoadNodes[0].OffsetFromBase;
15288 SDValue FirstChain = FirstLoad->getChain();
15289 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15290 // All loads must share the same chain.
15291 if (LoadNodes[i].MemNode->getChain() != FirstChain)
15292 break;
15294 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15295 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15296 break;
15297 LastConsecutiveLoad = i;
15299 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15300 isDereferenceable = false;
15302 // Find a legal type for the vector store.
15303 unsigned Elts = (i + 1) * NumMemElts;
15304 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15306 // Break early when size is too large to be legal.
15307 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15308 break;
15310 bool IsFastSt, IsFastLd;
15311 if (TLI.isTypeLegal(StoreTy) &&
15312 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15313 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15314 FirstStoreAlign, &IsFastSt) &&
15315 IsFastSt &&
15316 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15317 FirstLoadAlign, &IsFastLd) &&
15318 IsFastLd) {
15319 LastLegalVectorType = i + 1;
15322 // Find a legal type for the integer store.
15323 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15324 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15325 if (TLI.isTypeLegal(StoreTy) &&
15326 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15327 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15328 FirstStoreAlign, &IsFastSt) &&
15329 IsFastSt &&
15330 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15331 FirstLoadAlign, &IsFastLd) &&
15332 IsFastLd) {
15333 LastLegalIntegerType = i + 1;
15334 DoIntegerTruncate = false;
15335 // Or check whether a truncstore and extload is legal.
15336 } else if (TLI.getTypeAction(Context, StoreTy) ==
15337 TargetLowering::TypePromoteInteger) {
15338 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15339 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15340 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15341 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15342 StoreTy) &&
15343 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15344 StoreTy) &&
15345 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15346 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15347 FirstStoreAlign, &IsFastSt) &&
15348 IsFastSt &&
15349 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15350 FirstLoadAlign, &IsFastLd) &&
15351 IsFastLd) {
15352 LastLegalIntegerType = i + 1;
15353 DoIntegerTruncate = true;
15358 // Only use vector types if the vector type is larger than the integer
15359 // type. If they are the same, use integers.
15360 bool UseVectorTy =
15361 LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15362 unsigned LastLegalType =
15363 std::max(LastLegalVectorType, LastLegalIntegerType);
15365 // We add +1 here because the LastXXX variables refer to location while
15366 // the NumElem refers to array/index size.
15367 unsigned NumElem =
15368 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15369 NumElem = std::min(LastLegalType, NumElem);
15371 if (NumElem < 2) {
15372 // We know that candidate stores are in order and of correct
15373 // shape. While there is no mergeable sequence from the
15374 // beginning one may start later in the sequence. The only
15375 // reason a merge of size N could have failed where another of
15376 // the same size would not have is if the alignment or either
15377 // the load or store has improved. Drop as many candidates as we
15378 // can here.
15379 unsigned NumSkip = 1;
15380 while ((NumSkip < LoadNodes.size()) &&
15381 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15382 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15383 NumSkip++;
15384 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15385 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15386 NumConsecutiveStores -= NumSkip;
15387 continue;
15390 // Check that we can merge these candidates without causing a cycle.
15391 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15392 RootNode)) {
15393 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15394 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15395 NumConsecutiveStores -= NumElem;
15396 continue;
15399 // Find if it is better to use vectors or integers to load and store
15400 // to memory.
15401 EVT JointMemOpVT;
15402 if (UseVectorTy) {
15403 // Find a legal type for the vector store.
15404 unsigned Elts = NumElem * NumMemElts;
15405 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15406 } else {
15407 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15408 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15411 SDLoc LoadDL(LoadNodes[0].MemNode);
15412 SDLoc StoreDL(StoreNodes[0].MemNode);
15414 // The merged loads are required to have the same incoming chain, so
15415 // using the first's chain is acceptable.
15417 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15418 AddToWorklist(NewStoreChain.getNode());
15420 MachineMemOperand::Flags MMOFlags =
15421 isDereferenceable ? MachineMemOperand::MODereferenceable
15422 : MachineMemOperand::MONone;
15424 SDValue NewLoad, NewStore;
15425 if (UseVectorTy || !DoIntegerTruncate) {
15426 NewLoad =
15427 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15428 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15429 FirstLoadAlign, MMOFlags);
15430 NewStore = DAG.getStore(
15431 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15432 FirstInChain->getPointerInfo(), FirstStoreAlign);
15433 } else { // This must be the truncstore/extload case
15434 EVT ExtendedTy =
15435 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15436 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15437 FirstLoad->getChain(), FirstLoad->getBasePtr(),
15438 FirstLoad->getPointerInfo(), JointMemOpVT,
15439 FirstLoadAlign, MMOFlags);
15440 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15441 FirstInChain->getBasePtr(),
15442 FirstInChain->getPointerInfo(),
15443 JointMemOpVT, FirstInChain->getAlignment(),
15444 FirstInChain->getMemOperand()->getFlags());
15447 // Transfer chain users from old loads to the new load.
15448 for (unsigned i = 0; i < NumElem; ++i) {
15449 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15450 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15451 SDValue(NewLoad.getNode(), 1));
15454 // Replace the all stores with the new store. Recursively remove
15455 // corresponding value if its no longer used.
15456 for (unsigned i = 0; i < NumElem; ++i) {
15457 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15458 CombineTo(StoreNodes[i].MemNode, NewStore);
15459 if (Val.getNode()->use_empty())
15460 recursivelyDeleteUnusedNodes(Val.getNode());
15463 RV = true;
15464 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15465 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15466 NumConsecutiveStores -= NumElem;
15469 return RV;
15472 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15473 SDLoc SL(ST);
15474 SDValue ReplStore;
15476 // Replace the chain to avoid dependency.
15477 if (ST->isTruncatingStore()) {
15478 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15479 ST->getBasePtr(), ST->getMemoryVT(),
15480 ST->getMemOperand());
15481 } else {
15482 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15483 ST->getMemOperand());
15486 // Create token to keep both nodes around.
15487 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15488 MVT::Other, ST->getChain(), ReplStore);
15490 // Make sure the new and old chains are cleaned up.
15491 AddToWorklist(Token.getNode());
15493 // Don't add users to work list.
15494 return CombineTo(ST, Token, false);
15497 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15498 SDValue Value = ST->getValue();
15499 if (Value.getOpcode() == ISD::TargetConstantFP)
15500 return SDValue();
15502 SDLoc DL(ST);
15504 SDValue Chain = ST->getChain();
15505 SDValue Ptr = ST->getBasePtr();
15507 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15509 // NOTE: If the original store is volatile, this transform must not increase
15510 // the number of stores. For example, on x86-32 an f64 can be stored in one
15511 // processor operation but an i64 (which is not legal) requires two. So the
15512 // transform should not be done in this case.
15514 SDValue Tmp;
15515 switch (CFP->getSimpleValueType(0).SimpleTy) {
15516 default:
15517 llvm_unreachable("Unknown FP type");
15518 case MVT::f16: // We don't do this for these yet.
15519 case MVT::f80:
15520 case MVT::f128:
15521 case MVT::ppcf128:
15522 return SDValue();
15523 case MVT::f32:
15524 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15525 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15527 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15528 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15529 MVT::i32);
15530 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15533 return SDValue();
15534 case MVT::f64:
15535 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15536 !ST->isVolatile()) ||
15537 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15539 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15540 getZExtValue(), SDLoc(CFP), MVT::i64);
15541 return DAG.getStore(Chain, DL, Tmp,
15542 Ptr, ST->getMemOperand());
15545 if (!ST->isVolatile() &&
15546 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15547 // Many FP stores are not made apparent until after legalize, e.g. for
15548 // argument passing. Since this is so common, custom legalize the
15549 // 64-bit integer store into two 32-bit stores.
15550 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15551 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15552 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15553 if (DAG.getDataLayout().isBigEndian())
15554 std::swap(Lo, Hi);
15556 unsigned Alignment = ST->getAlignment();
15557 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15558 AAMDNodes AAInfo = ST->getAAInfo();
15560 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15561 ST->getAlignment(), MMOFlags, AAInfo);
15562 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15563 DAG.getConstant(4, DL, Ptr.getValueType()));
15564 Alignment = MinAlign(Alignment, 4U);
15565 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15566 ST->getPointerInfo().getWithOffset(4),
15567 Alignment, MMOFlags, AAInfo);
15568 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15569 St0, St1);
15572 return SDValue();
15576 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15577 StoreSDNode *ST = cast<StoreSDNode>(N);
15578 SDValue Chain = ST->getChain();
15579 SDValue Value = ST->getValue();
15580 SDValue Ptr = ST->getBasePtr();
15582 // If this is a store of a bit convert, store the input value if the
15583 // resultant store does not need a higher alignment than the original.
15584 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15585 ST->isUnindexed()) {
15586 EVT SVT = Value.getOperand(0).getValueType();
15587 // If the store is volatile, we only want to change the store type if the
15588 // resulting store is legal. Otherwise we might increase the number of
15589 // memory accesses. We don't care if the original type was legal or not
15590 // as we assume software couldn't rely on the number of accesses of an
15591 // illegal type.
15592 if (((!LegalOperations && !ST->isVolatile()) ||
15593 TLI.isOperationLegal(ISD::STORE, SVT)) &&
15594 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15595 unsigned OrigAlign = ST->getAlignment();
15596 bool Fast = false;
15597 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15598 ST->getAddressSpace(), OrigAlign, &Fast) &&
15599 Fast) {
15600 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15601 ST->getPointerInfo(), OrigAlign,
15602 ST->getMemOperand()->getFlags(), ST->getAAInfo());
15607 // Turn 'store undef, Ptr' -> nothing.
15608 if (Value.isUndef() && ST->isUnindexed())
15609 return Chain;
15611 // Try to infer better alignment information than the store already has.
15612 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15613 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15614 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15615 SDValue NewStore =
15616 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15617 ST->getMemoryVT(), Align,
15618 ST->getMemOperand()->getFlags(), ST->getAAInfo());
15619 // NewStore will always be N as we are only refining the alignment
15620 assert(NewStore.getNode() == N);
15621 (void)NewStore;
15626 // Try transforming a pair floating point load / store ops to integer
15627 // load / store ops.
15628 if (SDValue NewST = TransformFPLoadStorePair(N))
15629 return NewST;
15631 if (ST->isUnindexed()) {
15632 // Walk up chain skipping non-aliasing memory nodes, on this store and any
15633 // adjacent stores.
15634 if (findBetterNeighborChains(ST)) {
15635 // replaceStoreChain uses CombineTo, which handled all of the worklist
15636 // manipulation. Return the original node to not do anything else.
15637 return SDValue(ST, 0);
15639 Chain = ST->getChain();
15642 // FIXME: is there such a thing as a truncating indexed store?
15643 if (ST->isTruncatingStore() && ST->isUnindexed() &&
15644 Value.getValueType().isInteger() &&
15645 (!isa<ConstantSDNode>(Value) ||
15646 !cast<ConstantSDNode>(Value)->isOpaque())) {
15647 APInt TruncDemandedBits =
15648 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15649 ST->getMemoryVT().getScalarSizeInBits());
15651 // See if we can simplify the input to this truncstore with knowledge that
15652 // only the low bits are being used. For example:
15653 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
15654 SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
15655 AddToWorklist(Value.getNode());
15656 if (Shorter)
15657 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
15658 ST->getMemOperand());
15660 // Otherwise, see if we can simplify the operation with
15661 // SimplifyDemandedBits, which only works if the value has a single use.
15662 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
15663 // Re-visit the store if anything changed and the store hasn't been merged
15664 // with another node (N is deleted) SimplifyDemandedBits will add Value's
15665 // node back to the worklist if necessary, but we also need to re-visit
15666 // the Store node itself.
15667 if (N->getOpcode() != ISD::DELETED_NODE)
15668 AddToWorklist(N);
15669 return SDValue(N, 0);
15673 // If this is a load followed by a store to the same location, then the store
15674 // is dead/noop.
15675 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15676 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15677 ST->isUnindexed() && !ST->isVolatile() &&
15678 // There can't be any side effects between the load and store, such as
15679 // a call or store.
15680 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15681 // The store is dead, remove it.
15682 return Chain;
15686 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15687 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15688 !ST1->isVolatile()) {
15689 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
15690 ST->getMemoryVT() == ST1->getMemoryVT()) {
15691 // If this is a store followed by a store with the same value to the
15692 // same location, then the store is dead/noop.
15693 return Chain;
15696 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15697 !ST1->getBasePtr().isUndef()) {
15698 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
15699 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
15700 unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
15701 unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
15702 // If this is a store who's preceding store to a subset of the current
15703 // location and no one other node is chained to that store we can
15704 // effectively drop the store. Do not remove stores to undef as they may
15705 // be used as data sinks.
15706 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
15707 CombineTo(ST1, ST1->getChain());
15708 return SDValue();
15711 // If ST stores to a subset of preceding store's write set, we may be
15712 // able to fold ST's value into the preceding stored value. As we know
15713 // the other uses of ST1's chain are unconcerned with ST, this folding
15714 // will not affect those nodes.
15715 int64_t BitOffset;
15716 if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
15717 BitOffset)) {
15718 SDValue ChainValue = ST1->getValue();
15719 if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
15720 if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
15721 APInt Val = C1->getAPIntValue();
15722 APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
15723 // FIXME: Handle Big-endian mode.
15724 if (!DAG.getDataLayout().isBigEndian()) {
15725 Val.insertBits(InsertVal, BitOffset);
15726 SDValue NewSDVal =
15727 DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
15728 C1->isTargetOpcode(), C1->isOpaque());
15729 SDNode *NewST1 = DAG.UpdateNodeOperands(
15730 ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
15731 ST1->getOperand(3));
15732 return CombineTo(ST, SDValue(NewST1, 0));
15736 } // End ST subset of ST1 case.
15741 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15742 // truncating store. We can do this even if this is already a truncstore.
15743 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15744 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15745 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15746 ST->getMemoryVT())) {
15747 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15748 Ptr, ST->getMemoryVT(), ST->getMemOperand());
15751 // Always perform this optimization before types are legal. If the target
15752 // prefers, also try this after legalization to catch stores that were created
15753 // by intrinsics or other nodes.
15754 if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15755 while (true) {
15756 // There can be multiple store sequences on the same chain.
15757 // Keep trying to merge store sequences until we are unable to do so
15758 // or until we merge the last store on the chain.
15759 bool Changed = MergeConsecutiveStores(ST);
15760 if (!Changed) break;
15761 // Return N as merge only uses CombineTo and no worklist clean
15762 // up is necessary.
15763 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15764 return SDValue(N, 0);
15768 // Try transforming N to an indexed store.
15769 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15770 return SDValue(N, 0);
15772 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15774 // Make sure to do this only after attempting to merge stores in order to
15775 // avoid changing the types of some subset of stores due to visit order,
15776 // preventing their merging.
15777 if (isa<ConstantFPSDNode>(ST->getValue())) {
15778 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15779 return NewSt;
15782 if (SDValue NewSt = splitMergedValStore(ST))
15783 return NewSt;
15785 return ReduceLoadOpStoreWidth(N);
15788 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
15789 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
15790 if (!LifetimeEnd->hasOffset())
15791 return SDValue();
15793 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
15794 LifetimeEnd->getOffset(), false);
15796 // We walk up the chains to find stores.
15797 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
15798 while (!Chains.empty()) {
15799 SDValue Chain = Chains.back();
15800 Chains.pop_back();
15801 if (!Chain.hasOneUse())
15802 continue;
15803 switch (Chain.getOpcode()) {
15804 case ISD::TokenFactor:
15805 for (unsigned Nops = Chain.getNumOperands(); Nops;)
15806 Chains.push_back(Chain.getOperand(--Nops));
15807 break;
15808 case ISD::LIFETIME_START:
15809 case ISD::LIFETIME_END:
15810 // We can forward past any lifetime start/end that can be proven not to
15811 // alias the node.
15812 if (!isAlias(Chain.getNode(), N))
15813 Chains.push_back(Chain.getOperand(0));
15814 break;
15815 case ISD::STORE: {
15816 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
15817 if (ST->isVolatile() || ST->isIndexed())
15818 continue;
15819 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
15820 // If we store purely within object bounds just before its lifetime ends,
15821 // we can remove the store.
15822 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
15823 ST->getMemoryVT().getStoreSizeInBits())) {
15824 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
15825 dbgs() << "\nwithin LIFETIME_END of : ";
15826 LifetimeEndBase.dump(); dbgs() << "\n");
15827 CombineTo(ST, ST->getChain());
15828 return SDValue(N, 0);
15833 return SDValue();
15836 /// For the instruction sequence of store below, F and I values
15837 /// are bundled together as an i64 value before being stored into memory.
15838 /// Sometimes it is more efficent to generate separate stores for F and I,
15839 /// which can remove the bitwise instructions or sink them to colder places.
15841 /// (store (or (zext (bitcast F to i32) to i64),
15842 /// (shl (zext I to i64), 32)), addr) -->
15843 /// (store F, addr) and (store I, addr+4)
15845 /// Similarly, splitting for other merged store can also be beneficial, like:
15846 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15847 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15848 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15849 /// For pair of {i16, i8}, i32 store --> two i16 stores.
15850 /// For pair of {i8, i8}, i16 store --> two i8 stores.
15852 /// We allow each target to determine specifically which kind of splitting is
15853 /// supported.
15855 /// The store patterns are commonly seen from the simple code snippet below
15856 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15857 /// void goo(const std::pair<int, float> &);
15858 /// hoo() {
15859 /// ...
15860 /// goo(std::make_pair(tmp, ftmp));
15861 /// ...
15862 /// }
15864 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15865 if (OptLevel == CodeGenOpt::None)
15866 return SDValue();
15868 SDValue Val = ST->getValue();
15869 SDLoc DL(ST);
15871 // Match OR operand.
15872 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15873 return SDValue();
15875 // Match SHL operand and get Lower and Higher parts of Val.
15876 SDValue Op1 = Val.getOperand(0);
15877 SDValue Op2 = Val.getOperand(1);
15878 SDValue Lo, Hi;
15879 if (Op1.getOpcode() != ISD::SHL) {
15880 std::swap(Op1, Op2);
15881 if (Op1.getOpcode() != ISD::SHL)
15882 return SDValue();
15884 Lo = Op2;
15885 Hi = Op1.getOperand(0);
15886 if (!Op1.hasOneUse())
15887 return SDValue();
15889 // Match shift amount to HalfValBitSize.
15890 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15891 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15892 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15893 return SDValue();
15895 // Lo and Hi are zero-extended from int with size less equal than 32
15896 // to i64.
15897 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15898 !Lo.getOperand(0).getValueType().isScalarInteger() ||
15899 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15900 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15901 !Hi.getOperand(0).getValueType().isScalarInteger() ||
15902 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15903 return SDValue();
15905 // Use the EVT of low and high parts before bitcast as the input
15906 // of target query.
15907 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15908 ? Lo.getOperand(0).getValueType()
15909 : Lo.getValueType();
15910 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15911 ? Hi.getOperand(0).getValueType()
15912 : Hi.getValueType();
15913 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15914 return SDValue();
15916 // Start to split store.
15917 unsigned Alignment = ST->getAlignment();
15918 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15919 AAMDNodes AAInfo = ST->getAAInfo();
15921 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15922 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15923 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15924 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15926 SDValue Chain = ST->getChain();
15927 SDValue Ptr = ST->getBasePtr();
15928 // Lower value store.
15929 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15930 ST->getAlignment(), MMOFlags, AAInfo);
15931 Ptr =
15932 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15933 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15934 // Higher value store.
15935 SDValue St1 =
15936 DAG.getStore(St0, DL, Hi, Ptr,
15937 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15938 Alignment / 2, MMOFlags, AAInfo);
15939 return St1;
15942 /// Convert a disguised subvector insertion into a shuffle:
15943 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15944 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15945 /// Note: We do not use an insert_subvector node because that requires a legal
15946 /// subvector type.
15947 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15948 SDValue InsertVal = N->getOperand(1);
15949 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15950 !InsertVal.getOperand(0).getValueType().isVector())
15951 return SDValue();
15953 SDValue SubVec = InsertVal.getOperand(0);
15954 SDValue DestVec = N->getOperand(0);
15955 EVT SubVecVT = SubVec.getValueType();
15956 EVT VT = DestVec.getValueType();
15957 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15958 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15959 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15961 // Step 1: Create a shuffle mask that implements this insert operation. The
15962 // vector that we are inserting into will be operand 0 of the shuffle, so
15963 // those elements are just 'i'. The inserted subvector is in the first
15964 // positions of operand 1 of the shuffle. Example:
15965 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15966 SmallVector<int, 16> Mask(NumMaskVals);
15967 for (unsigned i = 0; i != NumMaskVals; ++i) {
15968 if (i / NumSrcElts == InsIndex)
15969 Mask[i] = (i % NumSrcElts) + NumMaskVals;
15970 else
15971 Mask[i] = i;
15974 // Bail out if the target can not handle the shuffle we want to create.
15975 EVT SubVecEltVT = SubVecVT.getVectorElementType();
15976 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15977 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15978 return SDValue();
15980 // Step 2: Create a wide vector from the inserted source vector by appending
15981 // undefined elements. This is the same size as our destination vector.
15982 SDLoc DL(N);
15983 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15984 ConcatOps[0] = SubVec;
15985 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15987 // Step 3: Shuffle in the padded subvector.
15988 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15989 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15990 AddToWorklist(PaddedSubV.getNode());
15991 AddToWorklist(DestVecBC.getNode());
15992 AddToWorklist(Shuf.getNode());
15993 return DAG.getBitcast(VT, Shuf);
15996 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15997 SDValue InVec = N->getOperand(0);
15998 SDValue InVal = N->getOperand(1);
15999 SDValue EltNo = N->getOperand(2);
16000 SDLoc DL(N);
16002 // If the inserted element is an UNDEF, just use the input vector.
16003 if (InVal.isUndef())
16004 return InVec;
16006 EVT VT = InVec.getValueType();
16007 unsigned NumElts = VT.getVectorNumElements();
16009 // Remove redundant insertions:
16010 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16011 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16012 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16013 return InVec;
16015 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16016 if (!IndexC) {
16017 // If this is variable insert to undef vector, it might be better to splat:
16018 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16019 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16020 SmallVector<SDValue, 8> Ops(NumElts, InVal);
16021 return DAG.getBuildVector(VT, DL, Ops);
16023 return SDValue();
16026 // We must know which element is being inserted for folds below here.
16027 unsigned Elt = IndexC->getZExtValue();
16028 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16029 return Shuf;
16031 // Canonicalize insert_vector_elt dag nodes.
16032 // Example:
16033 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16034 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16036 // Do this only if the child insert_vector node has one use; also
16037 // do this only if indices are both constants and Idx1 < Idx0.
16038 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16039 && isa<ConstantSDNode>(InVec.getOperand(2))) {
16040 unsigned OtherElt = InVec.getConstantOperandVal(2);
16041 if (Elt < OtherElt) {
16042 // Swap nodes.
16043 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16044 InVec.getOperand(0), InVal, EltNo);
16045 AddToWorklist(NewOp.getNode());
16046 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16047 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16051 // If we can't generate a legal BUILD_VECTOR, exit
16052 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16053 return SDValue();
16055 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16056 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
16057 // vector elements.
16058 SmallVector<SDValue, 8> Ops;
16059 // Do not combine these two vectors if the output vector will not replace
16060 // the input vector.
16061 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16062 Ops.append(InVec.getNode()->op_begin(),
16063 InVec.getNode()->op_end());
16064 } else if (InVec.isUndef()) {
16065 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16066 } else {
16067 return SDValue();
16069 assert(Ops.size() == NumElts && "Unexpected vector size");
16071 // Insert the element
16072 if (Elt < Ops.size()) {
16073 // All the operands of BUILD_VECTOR must have the same type;
16074 // we enforce that here.
16075 EVT OpVT = Ops[0].getValueType();
16076 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16079 // Return the new vector
16080 return DAG.getBuildVector(VT, DL, Ops);
16083 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16084 SDValue EltNo,
16085 LoadSDNode *OriginalLoad) {
16086 assert(!OriginalLoad->isVolatile());
16088 EVT ResultVT = EVE->getValueType(0);
16089 EVT VecEltVT = InVecVT.getVectorElementType();
16090 unsigned Align = OriginalLoad->getAlignment();
16091 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16092 VecEltVT.getTypeForEVT(*DAG.getContext()));
16094 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16095 return SDValue();
16097 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16098 ISD::NON_EXTLOAD : ISD::EXTLOAD;
16099 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16100 return SDValue();
16102 Align = NewAlign;
16104 SDValue NewPtr = OriginalLoad->getBasePtr();
16105 SDValue Offset;
16106 EVT PtrType = NewPtr.getValueType();
16107 MachinePointerInfo MPI;
16108 SDLoc DL(EVE);
16109 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16110 int Elt = ConstEltNo->getZExtValue();
16111 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16112 Offset = DAG.getConstant(PtrOff, DL, PtrType);
16113 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16114 } else {
16115 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16116 Offset = DAG.getNode(
16117 ISD::MUL, DL, PtrType, Offset,
16118 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16119 // Discard the pointer info except the address space because the memory
16120 // operand can't represent this new access since the offset is variable.
16121 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16123 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16125 // The replacement we need to do here is a little tricky: we need to
16126 // replace an extractelement of a load with a load.
16127 // Use ReplaceAllUsesOfValuesWith to do the replacement.
16128 // Note that this replacement assumes that the extractvalue is the only
16129 // use of the load; that's okay because we don't want to perform this
16130 // transformation in other cases anyway.
16131 SDValue Load;
16132 SDValue Chain;
16133 if (ResultVT.bitsGT(VecEltVT)) {
16134 // If the result type of vextract is wider than the load, then issue an
16135 // extending load instead.
16136 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16137 VecEltVT)
16138 ? ISD::ZEXTLOAD
16139 : ISD::EXTLOAD;
16140 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16141 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16142 Align, OriginalLoad->getMemOperand()->getFlags(),
16143 OriginalLoad->getAAInfo());
16144 Chain = Load.getValue(1);
16145 } else {
16146 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16147 MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16148 OriginalLoad->getAAInfo());
16149 Chain = Load.getValue(1);
16150 if (ResultVT.bitsLT(VecEltVT))
16151 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16152 else
16153 Load = DAG.getBitcast(ResultVT, Load);
16155 WorklistRemover DeadNodes(*this);
16156 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16157 SDValue To[] = { Load, Chain };
16158 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16159 // Since we're explicitly calling ReplaceAllUses, add the new node to the
16160 // worklist explicitly as well.
16161 AddToWorklist(Load.getNode());
16162 AddUsersToWorklist(Load.getNode()); // Add users too
16163 // Make sure to revisit this node to clean it up; it will usually be dead.
16164 AddToWorklist(EVE);
16165 ++OpsNarrowed;
16166 return SDValue(EVE, 0);
16169 /// Transform a vector binary operation into a scalar binary operation by moving
16170 /// the math/logic after an extract element of a vector.
16171 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16172 bool LegalOperations) {
16173 SDValue Vec = ExtElt->getOperand(0);
16174 SDValue Index = ExtElt->getOperand(1);
16175 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16176 if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
16177 return SDValue();
16179 // Targets may want to avoid this to prevent an expensive register transfer.
16180 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16181 if (!TLI.shouldScalarizeBinop(Vec))
16182 return SDValue();
16184 // Extracting an element of a vector constant is constant-folded, so this
16185 // transform is just replacing a vector op with a scalar op while moving the
16186 // extract.
16187 SDValue Op0 = Vec.getOperand(0);
16188 SDValue Op1 = Vec.getOperand(1);
16189 if (isAnyConstantBuildVector(Op0, true) ||
16190 isAnyConstantBuildVector(Op1, true)) {
16191 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16192 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16193 SDLoc DL(ExtElt);
16194 EVT VT = ExtElt->getValueType(0);
16195 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16196 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16197 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16200 return SDValue();
16203 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16204 SDValue VecOp = N->getOperand(0);
16205 SDValue Index = N->getOperand(1);
16206 EVT ScalarVT = N->getValueType(0);
16207 EVT VecVT = VecOp.getValueType();
16208 if (VecOp.isUndef())
16209 return DAG.getUNDEF(ScalarVT);
16211 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16213 // This only really matters if the index is non-constant since other combines
16214 // on the constant elements already work.
16215 SDLoc DL(N);
16216 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16217 Index == VecOp.getOperand(2)) {
16218 SDValue Elt = VecOp.getOperand(1);
16219 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16222 // (vextract (scalar_to_vector val, 0) -> val
16223 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16224 // Check if the result type doesn't match the inserted element type. A
16225 // SCALAR_TO_VECTOR may truncate the inserted element and the
16226 // EXTRACT_VECTOR_ELT may widen the extracted vector.
16227 SDValue InOp = VecOp.getOperand(0);
16228 if (InOp.getValueType() != ScalarVT) {
16229 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16230 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16232 return InOp;
16235 // extract_vector_elt of out-of-bounds element -> UNDEF
16236 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16237 unsigned NumElts = VecVT.getVectorNumElements();
16238 if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16239 return DAG.getUNDEF(ScalarVT);
16241 // extract_vector_elt (build_vector x, y), 1 -> y
16242 if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16243 TLI.isTypeLegal(VecVT) &&
16244 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16245 SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16246 EVT InEltVT = Elt.getValueType();
16248 // Sometimes build_vector's scalar input types do not match result type.
16249 if (ScalarVT == InEltVT)
16250 return Elt;
16252 // TODO: It may be useful to truncate if free if the build_vector implicitly
16253 // converts.
16256 // TODO: These transforms should not require the 'hasOneUse' restriction, but
16257 // there are regressions on multiple targets without it. We can end up with a
16258 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16259 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16260 VecOp.hasOneUse()) {
16261 // The vector index of the LSBs of the source depend on the endian-ness.
16262 bool IsLE = DAG.getDataLayout().isLittleEndian();
16263 unsigned ExtractIndex = IndexC->getZExtValue();
16264 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16265 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16266 SDValue BCSrc = VecOp.getOperand(0);
16267 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16268 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16270 if (LegalTypes && BCSrc.getValueType().isInteger() &&
16271 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16272 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16273 // trunc i64 X to i32
16274 SDValue X = BCSrc.getOperand(0);
16275 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16276 "Extract element and scalar to vector can't change element type "
16277 "from FP to integer.");
16278 unsigned XBitWidth = X.getValueSizeInBits();
16279 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16280 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16282 // An extract element return value type can be wider than its vector
16283 // operand element type. In that case, the high bits are undefined, so
16284 // it's possible that we may need to extend rather than truncate.
16285 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16286 assert(XBitWidth % VecEltBitWidth == 0 &&
16287 "Scalar bitwidth must be a multiple of vector element bitwidth");
16288 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16293 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16294 return BO;
16296 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16297 // We only perform this optimization before the op legalization phase because
16298 // we may introduce new vector instructions which are not backed by TD
16299 // patterns. For example on AVX, extracting elements from a wide vector
16300 // without using extract_subvector. However, if we can find an underlying
16301 // scalar value, then we can always use that.
16302 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16303 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16304 // Find the new index to extract from.
16305 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16307 // Extracting an undef index is undef.
16308 if (OrigElt == -1)
16309 return DAG.getUNDEF(ScalarVT);
16311 // Select the right vector half to extract from.
16312 SDValue SVInVec;
16313 if (OrigElt < (int)NumElts) {
16314 SVInVec = VecOp.getOperand(0);
16315 } else {
16316 SVInVec = VecOp.getOperand(1);
16317 OrigElt -= NumElts;
16320 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16321 SDValue InOp = SVInVec.getOperand(OrigElt);
16322 if (InOp.getValueType() != ScalarVT) {
16323 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16324 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16327 return InOp;
16330 // FIXME: We should handle recursing on other vector shuffles and
16331 // scalar_to_vector here as well.
16333 if (!LegalOperations ||
16334 // FIXME: Should really be just isOperationLegalOrCustom.
16335 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16336 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16337 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16338 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16339 DAG.getConstant(OrigElt, DL, IndexTy));
16343 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16344 // simplify it based on the (valid) extraction indices.
16345 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16346 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16347 Use->getOperand(0) == VecOp &&
16348 isa<ConstantSDNode>(Use->getOperand(1));
16349 })) {
16350 APInt DemandedElts = APInt::getNullValue(NumElts);
16351 for (SDNode *Use : VecOp->uses()) {
16352 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16353 if (CstElt->getAPIntValue().ult(NumElts))
16354 DemandedElts.setBit(CstElt->getZExtValue());
16356 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16357 // We simplified the vector operand of this extract element. If this
16358 // extract is not dead, visit it again so it is folded properly.
16359 if (N->getOpcode() != ISD::DELETED_NODE)
16360 AddToWorklist(N);
16361 return SDValue(N, 0);
16365 // Everything under here is trying to match an extract of a loaded value.
16366 // If the result of load has to be truncated, then it's not necessarily
16367 // profitable.
16368 bool BCNumEltsChanged = false;
16369 EVT ExtVT = VecVT.getVectorElementType();
16370 EVT LVT = ExtVT;
16371 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16372 return SDValue();
16374 if (VecOp.getOpcode() == ISD::BITCAST) {
16375 // Don't duplicate a load with other uses.
16376 if (!VecOp.hasOneUse())
16377 return SDValue();
16379 EVT BCVT = VecOp.getOperand(0).getValueType();
16380 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16381 return SDValue();
16382 if (NumElts != BCVT.getVectorNumElements())
16383 BCNumEltsChanged = true;
16384 VecOp = VecOp.getOperand(0);
16385 ExtVT = BCVT.getVectorElementType();
16388 // extract (vector load $addr), i --> load $addr + i * size
16389 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16390 ISD::isNormalLoad(VecOp.getNode()) &&
16391 !Index->hasPredecessor(VecOp.getNode())) {
16392 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16393 if (VecLoad && !VecLoad->isVolatile())
16394 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16397 // Perform only after legalization to ensure build_vector / vector_shuffle
16398 // optimizations have already been done.
16399 if (!LegalOperations || !IndexC)
16400 return SDValue();
16402 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16403 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16404 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16405 int Elt = IndexC->getZExtValue();
16406 LoadSDNode *LN0 = nullptr;
16407 if (ISD::isNormalLoad(VecOp.getNode())) {
16408 LN0 = cast<LoadSDNode>(VecOp);
16409 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16410 VecOp.getOperand(0).getValueType() == ExtVT &&
16411 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16412 // Don't duplicate a load with other uses.
16413 if (!VecOp.hasOneUse())
16414 return SDValue();
16416 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16418 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16419 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16420 // =>
16421 // (load $addr+1*size)
16423 // Don't duplicate a load with other uses.
16424 if (!VecOp.hasOneUse())
16425 return SDValue();
16427 // If the bit convert changed the number of elements, it is unsafe
16428 // to examine the mask.
16429 if (BCNumEltsChanged)
16430 return SDValue();
16432 // Select the input vector, guarding against out of range extract vector.
16433 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16434 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16436 if (VecOp.getOpcode() == ISD::BITCAST) {
16437 // Don't duplicate a load with other uses.
16438 if (!VecOp.hasOneUse())
16439 return SDValue();
16441 VecOp = VecOp.getOperand(0);
16443 if (ISD::isNormalLoad(VecOp.getNode())) {
16444 LN0 = cast<LoadSDNode>(VecOp);
16445 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
16446 Index = DAG.getConstant(Elt, DL, Index.getValueType());
16450 // Make sure we found a non-volatile load and the extractelement is
16451 // the only use.
16452 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
16453 return SDValue();
16455 // If Idx was -1 above, Elt is going to be -1, so just return undef.
16456 if (Elt == -1)
16457 return DAG.getUNDEF(LVT);
16459 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
16462 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16463 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
16464 // We perform this optimization post type-legalization because
16465 // the type-legalizer often scalarizes integer-promoted vectors.
16466 // Performing this optimization before may create bit-casts which
16467 // will be type-legalized to complex code sequences.
16468 // We perform this optimization only before the operation legalizer because we
16469 // may introduce illegal operations.
16470 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
16471 return SDValue();
16473 unsigned NumInScalars = N->getNumOperands();
16474 SDLoc DL(N);
16475 EVT VT = N->getValueType(0);
16477 // Check to see if this is a BUILD_VECTOR of a bunch of values
16478 // which come from any_extend or zero_extend nodes. If so, we can create
16479 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16480 // optimizations. We do not handle sign-extend because we can't fill the sign
16481 // using shuffles.
16482 EVT SourceType = MVT::Other;
16483 bool AllAnyExt = true;
16485 for (unsigned i = 0; i != NumInScalars; ++i) {
16486 SDValue In = N->getOperand(i);
16487 // Ignore undef inputs.
16488 if (In.isUndef()) continue;
16490 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
16491 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
16493 // Abort if the element is not an extension.
16494 if (!ZeroExt && !AnyExt) {
16495 SourceType = MVT::Other;
16496 break;
16499 // The input is a ZeroExt or AnyExt. Check the original type.
16500 EVT InTy = In.getOperand(0).getValueType();
16502 // Check that all of the widened source types are the same.
16503 if (SourceType == MVT::Other)
16504 // First time.
16505 SourceType = InTy;
16506 else if (InTy != SourceType) {
16507 // Multiple income types. Abort.
16508 SourceType = MVT::Other;
16509 break;
16512 // Check if all of the extends are ANY_EXTENDs.
16513 AllAnyExt &= AnyExt;
16516 // In order to have valid types, all of the inputs must be extended from the
16517 // same source type and all of the inputs must be any or zero extend.
16518 // Scalar sizes must be a power of two.
16519 EVT OutScalarTy = VT.getScalarType();
16520 bool ValidTypes = SourceType != MVT::Other &&
16521 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16522 isPowerOf2_32(SourceType.getSizeInBits());
16524 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16525 // turn into a single shuffle instruction.
16526 if (!ValidTypes)
16527 return SDValue();
16529 bool isLE = DAG.getDataLayout().isLittleEndian();
16530 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16531 assert(ElemRatio > 1 && "Invalid element size ratio");
16532 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16533 DAG.getConstant(0, DL, SourceType);
16535 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16536 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16538 // Populate the new build_vector
16539 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16540 SDValue Cast = N->getOperand(i);
16541 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16542 Cast.getOpcode() == ISD::ZERO_EXTEND ||
16543 Cast.isUndef()) && "Invalid cast opcode");
16544 SDValue In;
16545 if (Cast.isUndef())
16546 In = DAG.getUNDEF(SourceType);
16547 else
16548 In = Cast->getOperand(0);
16549 unsigned Index = isLE ? (i * ElemRatio) :
16550 (i * ElemRatio + (ElemRatio - 1));
16552 assert(Index < Ops.size() && "Invalid index");
16553 Ops[Index] = In;
16556 // The type of the new BUILD_VECTOR node.
16557 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16558 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16559 "Invalid vector size");
16560 // Check if the new vector type is legal.
16561 if (!isTypeLegal(VecVT) ||
16562 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16563 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
16564 return SDValue();
16566 // Make the new BUILD_VECTOR.
16567 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16569 // The new BUILD_VECTOR node has the potential to be further optimized.
16570 AddToWorklist(BV.getNode());
16571 // Bitcast to the desired type.
16572 return DAG.getBitcast(VT, BV);
16575 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16576 ArrayRef<int> VectorMask,
16577 SDValue VecIn1, SDValue VecIn2,
16578 unsigned LeftIdx, bool DidSplitVec) {
16579 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16580 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16582 EVT VT = N->getValueType(0);
16583 EVT InVT1 = VecIn1.getValueType();
16584 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16586 unsigned NumElems = VT.getVectorNumElements();
16587 unsigned ShuffleNumElems = NumElems;
16589 // If we artificially split a vector in two already, then the offsets in the
16590 // operands will all be based off of VecIn1, even those in VecIn2.
16591 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
16593 // We can't generate a shuffle node with mismatched input and output types.
16594 // Try to make the types match the type of the output.
16595 if (InVT1 != VT || InVT2 != VT) {
16596 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16597 // If the output vector length is a multiple of both input lengths,
16598 // we can concatenate them and pad the rest with undefs.
16599 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16600 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16601 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16602 ConcatOps[0] = VecIn1;
16603 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16604 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16605 VecIn2 = SDValue();
16606 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16607 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16608 return SDValue();
16610 if (!VecIn2.getNode()) {
16611 // If we only have one input vector, and it's twice the size of the
16612 // output, split it in two.
16613 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16614 DAG.getConstant(NumElems, DL, IdxTy));
16615 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16616 // Since we now have shorter input vectors, adjust the offset of the
16617 // second vector's start.
16618 Vec2Offset = NumElems;
16619 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16620 // VecIn1 is wider than the output, and we have another, possibly
16621 // smaller input. Pad the smaller input with undefs, shuffle at the
16622 // input vector width, and extract the output.
16623 // The shuffle type is different than VT, so check legality again.
16624 if (LegalOperations &&
16625 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
16626 return SDValue();
16628 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16629 // lower it back into a BUILD_VECTOR. So if the inserted type is
16630 // illegal, don't even try.
16631 if (InVT1 != InVT2) {
16632 if (!TLI.isTypeLegal(InVT2))
16633 return SDValue();
16634 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16635 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16637 ShuffleNumElems = NumElems * 2;
16638 } else {
16639 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16640 // than VecIn1. We can't handle this for now - this case will disappear
16641 // when we start sorting the vectors by type.
16642 return SDValue();
16644 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16645 InVT1.getSizeInBits() == VT.getSizeInBits()) {
16646 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16647 ConcatOps[0] = VecIn2;
16648 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16649 } else {
16650 // TODO: Support cases where the length mismatch isn't exactly by a
16651 // factor of 2.
16652 // TODO: Move this check upwards, so that if we have bad type
16653 // mismatches, we don't create any DAG nodes.
16654 return SDValue();
16658 // Initialize mask to undef.
16659 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16661 // Only need to run up to the number of elements actually used, not the
16662 // total number of elements in the shuffle - if we are shuffling a wider
16663 // vector, the high lanes should be set to undef.
16664 for (unsigned i = 0; i != NumElems; ++i) {
16665 if (VectorMask[i] <= 0)
16666 continue;
16668 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16669 if (VectorMask[i] == (int)LeftIdx) {
16670 Mask[i] = ExtIndex;
16671 } else if (VectorMask[i] == (int)LeftIdx + 1) {
16672 Mask[i] = Vec2Offset + ExtIndex;
16676 // The type the input vectors may have changed above.
16677 InVT1 = VecIn1.getValueType();
16679 // If we already have a VecIn2, it should have the same type as VecIn1.
16680 // If we don't, get an undef/zero vector of the appropriate type.
16681 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16682 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16684 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16685 if (ShuffleNumElems > NumElems)
16686 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16688 return Shuffle;
16691 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
16692 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16694 // First, determine where the build vector is not undef.
16695 // TODO: We could extend this to handle zero elements as well as undefs.
16696 int NumBVOps = BV->getNumOperands();
16697 int ZextElt = -1;
16698 for (int i = 0; i != NumBVOps; ++i) {
16699 SDValue Op = BV->getOperand(i);
16700 if (Op.isUndef())
16701 continue;
16702 if (ZextElt == -1)
16703 ZextElt = i;
16704 else
16705 return SDValue();
16707 // Bail out if there's no non-undef element.
16708 if (ZextElt == -1)
16709 return SDValue();
16711 // The build vector contains some number of undef elements and exactly
16712 // one other element. That other element must be a zero-extended scalar
16713 // extracted from a vector at a constant index to turn this into a shuffle.
16714 // Also, require that the build vector does not implicitly truncate/extend
16715 // its elements.
16716 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16717 EVT VT = BV->getValueType(0);
16718 SDValue Zext = BV->getOperand(ZextElt);
16719 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16720 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16721 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16722 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16723 return SDValue();
16725 // The zero-extend must be a multiple of the source size, and we must be
16726 // building a vector of the same size as the source of the extract element.
16727 SDValue Extract = Zext.getOperand(0);
16728 unsigned DestSize = Zext.getValueSizeInBits();
16729 unsigned SrcSize = Extract.getValueSizeInBits();
16730 if (DestSize % SrcSize != 0 ||
16731 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16732 return SDValue();
16734 // Create a shuffle mask that will combine the extracted element with zeros
16735 // and undefs.
16736 int ZextRatio = DestSize / SrcSize;
16737 int NumMaskElts = NumBVOps * ZextRatio;
16738 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16739 for (int i = 0; i != NumMaskElts; ++i) {
16740 if (i / ZextRatio == ZextElt) {
16741 // The low bits of the (potentially translated) extracted element map to
16742 // the source vector. The high bits map to zero. We will use a zero vector
16743 // as the 2nd source operand of the shuffle, so use the 1st element of
16744 // that vector (mask value is number-of-elements) for the high bits.
16745 if (i % ZextRatio == 0)
16746 ShufMask[i] = Extract.getConstantOperandVal(1);
16747 else
16748 ShufMask[i] = NumMaskElts;
16751 // Undef elements of the build vector remain undef because we initialize
16752 // the shuffle mask with -1.
16755 // Turn this into a shuffle with zero if that's legal.
16756 EVT VecVT = Extract.getOperand(0).getValueType();
16757 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16758 return SDValue();
16760 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16761 // bitcast (shuffle V, ZeroVec, VectorMask)
16762 SDLoc DL(BV);
16763 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16764 SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16765 ShufMask);
16766 return DAG.getBitcast(VT, Shuf);
16769 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16770 // operations. If the types of the vectors we're extracting from allow it,
16771 // turn this into a vector_shuffle node.
16772 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16773 SDLoc DL(N);
16774 EVT VT = N->getValueType(0);
16776 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16777 if (!isTypeLegal(VT))
16778 return SDValue();
16780 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16781 return V;
16783 // May only combine to shuffle after legalize if shuffle is legal.
16784 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16785 return SDValue();
16787 bool UsesZeroVector = false;
16788 unsigned NumElems = N->getNumOperands();
16790 // Record, for each element of the newly built vector, which input vector
16791 // that element comes from. -1 stands for undef, 0 for the zero vector,
16792 // and positive values for the input vectors.
16793 // VectorMask maps each element to its vector number, and VecIn maps vector
16794 // numbers to their initial SDValues.
16796 SmallVector<int, 8> VectorMask(NumElems, -1);
16797 SmallVector<SDValue, 8> VecIn;
16798 VecIn.push_back(SDValue());
16800 for (unsigned i = 0; i != NumElems; ++i) {
16801 SDValue Op = N->getOperand(i);
16803 if (Op.isUndef())
16804 continue;
16806 // See if we can use a blend with a zero vector.
16807 // TODO: Should we generalize this to a blend with an arbitrary constant
16808 // vector?
16809 if (isNullConstant(Op) || isNullFPConstant(Op)) {
16810 UsesZeroVector = true;
16811 VectorMask[i] = 0;
16812 continue;
16815 // Not an undef or zero. If the input is something other than an
16816 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16817 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16818 !isa<ConstantSDNode>(Op.getOperand(1)))
16819 return SDValue();
16820 SDValue ExtractedFromVec = Op.getOperand(0);
16822 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
16823 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16824 return SDValue();
16826 // All inputs must have the same element type as the output.
16827 if (VT.getVectorElementType() !=
16828 ExtractedFromVec.getValueType().getVectorElementType())
16829 return SDValue();
16831 // Have we seen this input vector before?
16832 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16833 // a map back from SDValues to numbers isn't worth it.
16834 unsigned Idx = std::distance(
16835 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16836 if (Idx == VecIn.size())
16837 VecIn.push_back(ExtractedFromVec);
16839 VectorMask[i] = Idx;
16842 // If we didn't find at least one input vector, bail out.
16843 if (VecIn.size() < 2)
16844 return SDValue();
16846 // If all the Operands of BUILD_VECTOR extract from same
16847 // vector, then split the vector efficiently based on the maximum
16848 // vector access index and adjust the VectorMask and
16849 // VecIn accordingly.
16850 bool DidSplitVec = false;
16851 if (VecIn.size() == 2) {
16852 unsigned MaxIndex = 0;
16853 unsigned NearestPow2 = 0;
16854 SDValue Vec = VecIn.back();
16855 EVT InVT = Vec.getValueType();
16856 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16857 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16859 for (unsigned i = 0; i < NumElems; i++) {
16860 if (VectorMask[i] <= 0)
16861 continue;
16862 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16863 IndexVec[i] = Index;
16864 MaxIndex = std::max(MaxIndex, Index);
16867 NearestPow2 = PowerOf2Ceil(MaxIndex);
16868 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16869 NumElems * 2 < NearestPow2) {
16870 unsigned SplitSize = NearestPow2 / 2;
16871 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16872 InVT.getVectorElementType(), SplitSize);
16873 if (TLI.isTypeLegal(SplitVT)) {
16874 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16875 DAG.getConstant(SplitSize, DL, IdxTy));
16876 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16877 DAG.getConstant(0, DL, IdxTy));
16878 VecIn.pop_back();
16879 VecIn.push_back(VecIn1);
16880 VecIn.push_back(VecIn2);
16881 DidSplitVec = true;
16883 for (unsigned i = 0; i < NumElems; i++) {
16884 if (VectorMask[i] <= 0)
16885 continue;
16886 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16892 // TODO: We want to sort the vectors by descending length, so that adjacent
16893 // pairs have similar length, and the longer vector is always first in the
16894 // pair.
16896 // TODO: Should this fire if some of the input vectors has illegal type (like
16897 // it does now), or should we let legalization run its course first?
16899 // Shuffle phase:
16900 // Take pairs of vectors, and shuffle them so that the result has elements
16901 // from these vectors in the correct places.
16902 // For example, given:
16903 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16904 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16905 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16906 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16907 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16908 // We will generate:
16909 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16910 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16911 SmallVector<SDValue, 4> Shuffles;
16912 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16913 unsigned LeftIdx = 2 * In + 1;
16914 SDValue VecLeft = VecIn[LeftIdx];
16915 SDValue VecRight =
16916 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16918 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16919 VecRight, LeftIdx, DidSplitVec))
16920 Shuffles.push_back(Shuffle);
16921 else
16922 return SDValue();
16925 // If we need the zero vector as an "ingredient" in the blend tree, add it
16926 // to the list of shuffles.
16927 if (UsesZeroVector)
16928 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16929 : DAG.getConstantFP(0.0, DL, VT));
16931 // If we only have one shuffle, we're done.
16932 if (Shuffles.size() == 1)
16933 return Shuffles[0];
16935 // Update the vector mask to point to the post-shuffle vectors.
16936 for (int &Vec : VectorMask)
16937 if (Vec == 0)
16938 Vec = Shuffles.size() - 1;
16939 else
16940 Vec = (Vec - 1) / 2;
16942 // More than one shuffle. Generate a binary tree of blends, e.g. if from
16943 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16944 // generate:
16945 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16946 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16947 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16948 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16949 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16950 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16951 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16953 // Make sure the initial size of the shuffle list is even.
16954 if (Shuffles.size() % 2)
16955 Shuffles.push_back(DAG.getUNDEF(VT));
16957 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16958 if (CurSize % 2) {
16959 Shuffles[CurSize] = DAG.getUNDEF(VT);
16960 CurSize++;
16962 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16963 int Left = 2 * In;
16964 int Right = 2 * In + 1;
16965 SmallVector<int, 8> Mask(NumElems, -1);
16966 for (unsigned i = 0; i != NumElems; ++i) {
16967 if (VectorMask[i] == Left) {
16968 Mask[i] = i;
16969 VectorMask[i] = In;
16970 } else if (VectorMask[i] == Right) {
16971 Mask[i] = i + NumElems;
16972 VectorMask[i] = In;
16976 Shuffles[In] =
16977 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16980 return Shuffles[0];
16983 // Try to turn a build vector of zero extends of extract vector elts into a
16984 // a vector zero extend and possibly an extract subvector.
16985 // TODO: Support sign extend?
16986 // TODO: Allow undef elements?
16987 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16988 if (LegalOperations)
16989 return SDValue();
16991 EVT VT = N->getValueType(0);
16993 bool FoundZeroExtend = false;
16994 SDValue Op0 = N->getOperand(0);
16995 auto checkElem = [&](SDValue Op) -> int64_t {
16996 unsigned Opc = Op.getOpcode();
16997 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
16998 if ((Op.getOpcode() == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
16999 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17000 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17001 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17002 return C->getZExtValue();
17003 return -1;
17006 // Make sure the first element matches
17007 // (zext (extract_vector_elt X, C))
17008 int64_t Offset = checkElem(Op0);
17009 if (Offset < 0)
17010 return SDValue();
17012 unsigned NumElems = N->getNumOperands();
17013 SDValue In = Op0.getOperand(0).getOperand(0);
17014 EVT InSVT = In.getValueType().getScalarType();
17015 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17017 // Don't create an illegal input type after type legalization.
17018 if (LegalTypes && !TLI.isTypeLegal(InVT))
17019 return SDValue();
17021 // Ensure all the elements come from the same vector and are adjacent.
17022 for (unsigned i = 1; i != NumElems; ++i) {
17023 if ((Offset + i) != checkElem(N->getOperand(i)))
17024 return SDValue();
17027 SDLoc DL(N);
17028 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17029 Op0.getOperand(0).getOperand(1));
17030 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17031 VT, In);
17034 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17035 EVT VT = N->getValueType(0);
17037 // A vector built entirely of undefs is undef.
17038 if (ISD::allOperandsUndef(N))
17039 return DAG.getUNDEF(VT);
17041 // If this is a splat of a bitcast from another vector, change to a
17042 // concat_vector.
17043 // For example:
17044 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17045 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17047 // If X is a build_vector itself, the concat can become a larger build_vector.
17048 // TODO: Maybe this is useful for non-splat too?
17049 if (!LegalOperations) {
17050 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17051 Splat = peekThroughBitcasts(Splat);
17052 EVT SrcVT = Splat.getValueType();
17053 if (SrcVT.isVector()) {
17054 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17055 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17056 SrcVT.getVectorElementType(), NumElts);
17057 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17058 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17059 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17060 NewVT, Ops);
17061 return DAG.getBitcast(VT, Concat);
17067 // Check if we can express BUILD VECTOR via subvector extract.
17068 if (!LegalTypes && (N->getNumOperands() > 1)) {
17069 SDValue Op0 = N->getOperand(0);
17070 auto checkElem = [&](SDValue Op) -> uint64_t {
17071 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17072 (Op0.getOperand(0) == Op.getOperand(0)))
17073 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17074 return CNode->getZExtValue();
17075 return -1;
17078 int Offset = checkElem(Op0);
17079 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17080 if (Offset + i != checkElem(N->getOperand(i))) {
17081 Offset = -1;
17082 break;
17086 if ((Offset == 0) &&
17087 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17088 return Op0.getOperand(0);
17089 if ((Offset != -1) &&
17090 ((Offset % N->getValueType(0).getVectorNumElements()) ==
17091 0)) // IDX must be multiple of output size.
17092 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17093 Op0.getOperand(0), Op0.getOperand(1));
17096 if (SDValue V = convertBuildVecZextToZext(N))
17097 return V;
17099 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17100 return V;
17102 if (SDValue V = reduceBuildVecToShuffle(N))
17103 return V;
17105 return SDValue();
17108 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17109 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17110 EVT OpVT = N->getOperand(0).getValueType();
17112 // If the operands are legal vectors, leave them alone.
17113 if (TLI.isTypeLegal(OpVT))
17114 return SDValue();
17116 SDLoc DL(N);
17117 EVT VT = N->getValueType(0);
17118 SmallVector<SDValue, 8> Ops;
17120 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17121 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17123 // Keep track of what we encounter.
17124 bool AnyInteger = false;
17125 bool AnyFP = false;
17126 for (const SDValue &Op : N->ops()) {
17127 if (ISD::BITCAST == Op.getOpcode() &&
17128 !Op.getOperand(0).getValueType().isVector())
17129 Ops.push_back(Op.getOperand(0));
17130 else if (ISD::UNDEF == Op.getOpcode())
17131 Ops.push_back(ScalarUndef);
17132 else
17133 return SDValue();
17135 // Note whether we encounter an integer or floating point scalar.
17136 // If it's neither, bail out, it could be something weird like x86mmx.
17137 EVT LastOpVT = Ops.back().getValueType();
17138 if (LastOpVT.isFloatingPoint())
17139 AnyFP = true;
17140 else if (LastOpVT.isInteger())
17141 AnyInteger = true;
17142 else
17143 return SDValue();
17146 // If any of the operands is a floating point scalar bitcast to a vector,
17147 // use floating point types throughout, and bitcast everything.
17148 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17149 if (AnyFP) {
17150 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17151 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17152 if (AnyInteger) {
17153 for (SDValue &Op : Ops) {
17154 if (Op.getValueType() == SVT)
17155 continue;
17156 if (Op.isUndef())
17157 Op = ScalarUndef;
17158 else
17159 Op = DAG.getBitcast(SVT, Op);
17164 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17165 VT.getSizeInBits() / SVT.getSizeInBits());
17166 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17169 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17170 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17171 // most two distinct vectors the same size as the result, attempt to turn this
17172 // into a legal shuffle.
17173 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17174 EVT VT = N->getValueType(0);
17175 EVT OpVT = N->getOperand(0).getValueType();
17176 int NumElts = VT.getVectorNumElements();
17177 int NumOpElts = OpVT.getVectorNumElements();
17179 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17180 SmallVector<int, 8> Mask;
17182 for (SDValue Op : N->ops()) {
17183 Op = peekThroughBitcasts(Op);
17185 // UNDEF nodes convert to UNDEF shuffle mask values.
17186 if (Op.isUndef()) {
17187 Mask.append((unsigned)NumOpElts, -1);
17188 continue;
17191 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17192 return SDValue();
17194 // What vector are we extracting the subvector from and at what index?
17195 SDValue ExtVec = Op.getOperand(0);
17197 // We want the EVT of the original extraction to correctly scale the
17198 // extraction index.
17199 EVT ExtVT = ExtVec.getValueType();
17200 ExtVec = peekThroughBitcasts(ExtVec);
17202 // UNDEF nodes convert to UNDEF shuffle mask values.
17203 if (ExtVec.isUndef()) {
17204 Mask.append((unsigned)NumOpElts, -1);
17205 continue;
17208 if (!isa<ConstantSDNode>(Op.getOperand(1)))
17209 return SDValue();
17210 int ExtIdx = Op.getConstantOperandVal(1);
17212 // Ensure that we are extracting a subvector from a vector the same
17213 // size as the result.
17214 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17215 return SDValue();
17217 // Scale the subvector index to account for any bitcast.
17218 int NumExtElts = ExtVT.getVectorNumElements();
17219 if (0 == (NumExtElts % NumElts))
17220 ExtIdx /= (NumExtElts / NumElts);
17221 else if (0 == (NumElts % NumExtElts))
17222 ExtIdx *= (NumElts / NumExtElts);
17223 else
17224 return SDValue();
17226 // At most we can reference 2 inputs in the final shuffle.
17227 if (SV0.isUndef() || SV0 == ExtVec) {
17228 SV0 = ExtVec;
17229 for (int i = 0; i != NumOpElts; ++i)
17230 Mask.push_back(i + ExtIdx);
17231 } else if (SV1.isUndef() || SV1 == ExtVec) {
17232 SV1 = ExtVec;
17233 for (int i = 0; i != NumOpElts; ++i)
17234 Mask.push_back(i + ExtIdx + NumElts);
17235 } else {
17236 return SDValue();
17240 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
17241 return SDValue();
17243 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17244 DAG.getBitcast(VT, SV1), Mask);
17247 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17248 // If we only have one input vector, we don't need to do any concatenation.
17249 if (N->getNumOperands() == 1)
17250 return N->getOperand(0);
17252 // Check if all of the operands are undefs.
17253 EVT VT = N->getValueType(0);
17254 if (ISD::allOperandsUndef(N))
17255 return DAG.getUNDEF(VT);
17257 // Optimize concat_vectors where all but the first of the vectors are undef.
17258 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17259 return Op.isUndef();
17260 })) {
17261 SDValue In = N->getOperand(0);
17262 assert(In.getValueType().isVector() && "Must concat vectors");
17264 SDValue Scalar = peekThroughOneUseBitcasts(In);
17266 // concat_vectors(scalar_to_vector(scalar), undef) ->
17267 // scalar_to_vector(scalar)
17268 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17269 Scalar.hasOneUse()) {
17270 EVT SVT = Scalar.getValueType().getVectorElementType();
17271 if (SVT == Scalar.getOperand(0).getValueType())
17272 Scalar = Scalar.getOperand(0);
17275 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17276 if (!Scalar.getValueType().isVector()) {
17277 // If the bitcast type isn't legal, it might be a trunc of a legal type;
17278 // look through the trunc so we can still do the transform:
17279 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17280 if (Scalar->getOpcode() == ISD::TRUNCATE &&
17281 !TLI.isTypeLegal(Scalar.getValueType()) &&
17282 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17283 Scalar = Scalar->getOperand(0);
17285 EVT SclTy = Scalar.getValueType();
17287 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17288 return SDValue();
17290 // Bail out if the vector size is not a multiple of the scalar size.
17291 if (VT.getSizeInBits() % SclTy.getSizeInBits())
17292 return SDValue();
17294 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17295 if (VNTNumElms < 2)
17296 return SDValue();
17298 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17299 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17300 return SDValue();
17302 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17303 return DAG.getBitcast(VT, Res);
17307 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17308 // We have already tested above for an UNDEF only concatenation.
17309 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17310 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17311 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17312 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17314 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17315 SmallVector<SDValue, 8> Opnds;
17316 EVT SVT = VT.getScalarType();
17318 EVT MinVT = SVT;
17319 if (!SVT.isFloatingPoint()) {
17320 // If BUILD_VECTOR are from built from integer, they may have different
17321 // operand types. Get the smallest type and truncate all operands to it.
17322 bool FoundMinVT = false;
17323 for (const SDValue &Op : N->ops())
17324 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17325 EVT OpSVT = Op.getOperand(0).getValueType();
17326 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17327 FoundMinVT = true;
17329 assert(FoundMinVT && "Concat vector type mismatch");
17332 for (const SDValue &Op : N->ops()) {
17333 EVT OpVT = Op.getValueType();
17334 unsigned NumElts = OpVT.getVectorNumElements();
17336 if (ISD::UNDEF == Op.getOpcode())
17337 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17339 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17340 if (SVT.isFloatingPoint()) {
17341 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17342 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17343 } else {
17344 for (unsigned i = 0; i != NumElts; ++i)
17345 Opnds.push_back(
17346 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17351 assert(VT.getVectorNumElements() == Opnds.size() &&
17352 "Concat vector type mismatch");
17353 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17356 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17357 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17358 return V;
17360 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17361 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17362 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17363 return V;
17365 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17366 // nodes often generate nop CONCAT_VECTOR nodes.
17367 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17368 // place the incoming vectors at the exact same location.
17369 SDValue SingleSource = SDValue();
17370 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17372 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17373 SDValue Op = N->getOperand(i);
17375 if (Op.isUndef())
17376 continue;
17378 // Check if this is the identity extract:
17379 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17380 return SDValue();
17382 // Find the single incoming vector for the extract_subvector.
17383 if (SingleSource.getNode()) {
17384 if (Op.getOperand(0) != SingleSource)
17385 return SDValue();
17386 } else {
17387 SingleSource = Op.getOperand(0);
17389 // Check the source type is the same as the type of the result.
17390 // If not, this concat may extend the vector, so we can not
17391 // optimize it away.
17392 if (SingleSource.getValueType() != N->getValueType(0))
17393 return SDValue();
17396 unsigned IdentityIndex = i * PartNumElem;
17397 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17398 // The extract index must be constant.
17399 if (!CS)
17400 return SDValue();
17402 // Check that we are reading from the identity index.
17403 if (CS->getZExtValue() != IdentityIndex)
17404 return SDValue();
17407 if (SingleSource.getNode())
17408 return SingleSource;
17410 return SDValue();
17413 /// If we are extracting a subvector produced by a wide binary operator try
17414 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17415 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
17416 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17417 // some of these bailouts with other transforms.
17419 // The extract index must be a constant, so we can map it to a concat operand.
17420 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17421 if (!ExtractIndexC)
17422 return SDValue();
17424 // We are looking for an optionally bitcasted wide vector binary operator
17425 // feeding an extract subvector.
17426 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
17427 if (!ISD::isBinaryOp(BinOp.getNode()))
17428 return SDValue();
17430 // The binop must be a vector type, so we can extract some fraction of it.
17431 EVT WideBVT = BinOp.getValueType();
17432 if (!WideBVT.isVector())
17433 return SDValue();
17435 EVT VT = Extract->getValueType(0);
17436 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
17437 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
17438 "Extract index is not a multiple of the vector length.");
17440 // Bail out if this is not a proper multiple width extraction.
17441 unsigned WideWidth = WideBVT.getSizeInBits();
17442 unsigned NarrowWidth = VT.getSizeInBits();
17443 if (WideWidth % NarrowWidth != 0)
17444 return SDValue();
17446 // Bail out if we are extracting a fraction of a single operation. This can
17447 // occur because we potentially looked through a bitcast of the binop.
17448 unsigned NarrowingRatio = WideWidth / NarrowWidth;
17449 unsigned WideNumElts = WideBVT.getVectorNumElements();
17450 if (WideNumElts % NarrowingRatio != 0)
17451 return SDValue();
17453 // Bail out if the target does not support a narrower version of the binop.
17454 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
17455 WideNumElts / NarrowingRatio);
17456 unsigned BOpcode = BinOp.getOpcode();
17457 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17458 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
17459 return SDValue();
17461 // If extraction is cheap, we don't need to look at the binop operands
17462 // for concat ops. The narrow binop alone makes this transform profitable.
17463 // We can't just reuse the original extract index operand because we may have
17464 // bitcasted.
17465 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
17466 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
17467 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
17468 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
17469 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
17470 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17471 SDLoc DL(Extract);
17472 SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17473 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17474 BinOp.getOperand(0), NewExtIndex);
17475 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17476 BinOp.getOperand(1), NewExtIndex);
17477 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
17478 BinOp.getNode()->getFlags());
17479 return DAG.getBitcast(VT, NarrowBinOp);
17482 // Only handle the case where we are doubling and then halving. A larger ratio
17483 // may require more than two narrow binops to replace the wide binop.
17484 if (NarrowingRatio != 2)
17485 return SDValue();
17487 // TODO: The motivating case for this transform is an x86 AVX1 target. That
17488 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17489 // flavors, but no other 256-bit integer support. This could be extended to
17490 // handle any binop, but that may require fixing/adding other folds to avoid
17491 // codegen regressions.
17492 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
17493 return SDValue();
17495 // We need at least one concatenation operation of a binop operand to make
17496 // this transform worthwhile. The concat must double the input vector sizes.
17497 // TODO: Should we also handle INSERT_SUBVECTOR patterns?
17498 SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
17499 SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
17500 bool ConcatL =
17501 LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17502 bool ConcatR =
17503 RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17504 if (ConcatL || ConcatR) {
17505 // If a binop operand was not the result of a concat, we must extract a
17506 // half-sized operand for our new narrow binop:
17507 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17508 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
17509 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
17510 SDLoc DL(Extract);
17511 SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17512 SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17513 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17514 BinOp.getOperand(0), IndexC);
17516 SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17517 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17518 BinOp.getOperand(1), IndexC);
17520 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17521 return DAG.getBitcast(VT, NarrowBinOp);
17524 return SDValue();
17527 /// If we are extracting a subvector from a wide vector load, convert to a
17528 /// narrow load to eliminate the extraction:
17529 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17530 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
17531 // TODO: Add support for big-endian. The offset calculation must be adjusted.
17532 if (DAG.getDataLayout().isBigEndian())
17533 return SDValue();
17535 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17536 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17537 if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17538 return SDValue();
17540 // Allow targets to opt-out.
17541 EVT VT = Extract->getValueType(0);
17542 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17543 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17544 return SDValue();
17546 // The narrow load will be offset from the base address of the old load if
17547 // we are extracting from something besides index 0 (little-endian).
17548 SDLoc DL(Extract);
17549 SDValue BaseAddr = Ld->getOperand(1);
17550 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17552 // TODO: Use "BaseIndexOffset" to make this more effective.
17553 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17554 MachineFunction &MF = DAG.getMachineFunction();
17555 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17556 VT.getStoreSize());
17557 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17558 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17559 return NewLd;
17562 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
17563 EVT NVT = N->getValueType(0);
17564 SDValue V = N->getOperand(0);
17566 // Extract from UNDEF is UNDEF.
17567 if (V.isUndef())
17568 return DAG.getUNDEF(NVT);
17570 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
17571 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17572 return NarrowLoad;
17574 // Combine an extract of an extract into a single extract_subvector.
17575 // ext (ext X, C), 0 --> ext X, C
17576 if (isNullConstant(N->getOperand(1)) &&
17577 V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse() &&
17578 isa<ConstantSDNode>(V.getOperand(1))) {
17579 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
17580 V.getConstantOperandVal(1)) &&
17581 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
17582 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
17583 V.getOperand(1));
17587 // Combine:
17588 // (extract_subvec (concat V1, V2, ...), i)
17589 // Into:
17590 // Vi if possible
17591 // Only operand 0 is checked as 'concat' assumes all inputs of the same
17592 // type.
17593 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
17594 isa<ConstantSDNode>(N->getOperand(1)) &&
17595 V.getOperand(0).getValueType() == NVT) {
17596 unsigned Idx = N->getConstantOperandVal(1);
17597 unsigned NumElems = NVT.getVectorNumElements();
17598 assert((Idx % NumElems) == 0 &&
17599 "IDX in concat is not a multiple of the result vector length.");
17600 return V->getOperand(Idx / NumElems);
17603 V = peekThroughBitcasts(V);
17605 // If the input is a build vector. Try to make a smaller build vector.
17606 if (V.getOpcode() == ISD::BUILD_VECTOR) {
17607 if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17608 EVT InVT = V.getValueType();
17609 unsigned ExtractSize = NVT.getSizeInBits();
17610 unsigned EltSize = InVT.getScalarSizeInBits();
17611 // Only do this if we won't split any elements.
17612 if (ExtractSize % EltSize == 0) {
17613 unsigned NumElems = ExtractSize / EltSize;
17614 EVT EltVT = InVT.getVectorElementType();
17615 EVT ExtractVT = NumElems == 1 ? EltVT
17616 : EVT::getVectorVT(*DAG.getContext(),
17617 EltVT, NumElems);
17618 if ((Level < AfterLegalizeDAG ||
17619 (NumElems == 1 ||
17620 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17621 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17622 unsigned IdxVal = Idx->getZExtValue();
17623 IdxVal *= NVT.getScalarSizeInBits();
17624 IdxVal /= EltSize;
17626 if (NumElems == 1) {
17627 SDValue Src = V->getOperand(IdxVal);
17628 if (EltVT != Src.getValueType())
17629 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17630 return DAG.getBitcast(NVT, Src);
17633 // Extract the pieces from the original build_vector.
17634 SDValue BuildVec = DAG.getBuildVector(
17635 ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
17636 return DAG.getBitcast(NVT, BuildVec);
17642 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17643 // Handle only simple case where vector being inserted and vector
17644 // being extracted are of same size.
17645 EVT SmallVT = V.getOperand(1).getValueType();
17646 if (!NVT.bitsEq(SmallVT))
17647 return SDValue();
17649 // Only handle cases where both indexes are constants.
17650 auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
17651 auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17653 if (InsIdx && ExtIdx) {
17654 // Combine:
17655 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17656 // Into:
17657 // indices are equal or bit offsets are equal => V1
17658 // otherwise => (extract_subvec V1, ExtIdx)
17659 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17660 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17661 return DAG.getBitcast(NVT, V.getOperand(1));
17662 return DAG.getNode(
17663 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17664 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17665 N->getOperand(1));
17669 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17670 return NarrowBOp;
17672 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17673 return SDValue(N, 0);
17675 return SDValue();
17678 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
17679 /// followed by concatenation. Narrow vector ops may have better performance
17680 /// than wide ops, and this can unlock further narrowing of other vector ops.
17681 /// Targets can invert this transform later if it is not profitable.
17682 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
17683 SelectionDAG &DAG) {
17684 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
17685 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
17686 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
17687 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
17688 return SDValue();
17690 // Split the wide shuffle mask into halves. Any mask element that is accessing
17691 // operand 1 is offset down to account for narrowing of the vectors.
17692 ArrayRef<int> Mask = Shuf->getMask();
17693 EVT VT = Shuf->getValueType(0);
17694 unsigned NumElts = VT.getVectorNumElements();
17695 unsigned HalfNumElts = NumElts / 2;
17696 SmallVector<int, 16> Mask0(HalfNumElts, -1);
17697 SmallVector<int, 16> Mask1(HalfNumElts, -1);
17698 for (unsigned i = 0; i != NumElts; ++i) {
17699 if (Mask[i] == -1)
17700 continue;
17701 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
17702 if (i < HalfNumElts)
17703 Mask0[i] = M;
17704 else
17705 Mask1[i - HalfNumElts] = M;
17708 // Ask the target if this is a valid transform.
17709 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17710 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
17711 HalfNumElts);
17712 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
17713 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
17714 return SDValue();
17716 // shuffle (concat X, undef), (concat Y, undef), Mask -->
17717 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
17718 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
17719 SDLoc DL(Shuf);
17720 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
17721 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
17722 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
17725 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17726 // or turn a shuffle of a single concat into simpler shuffle then concat.
17727 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
17728 EVT VT = N->getValueType(0);
17729 unsigned NumElts = VT.getVectorNumElements();
17731 SDValue N0 = N->getOperand(0);
17732 SDValue N1 = N->getOperand(1);
17733 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17734 ArrayRef<int> Mask = SVN->getMask();
17736 SmallVector<SDValue, 4> Ops;
17737 EVT ConcatVT = N0.getOperand(0).getValueType();
17738 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17739 unsigned NumConcats = NumElts / NumElemsPerConcat;
17741 auto IsUndefMaskElt = [](int i) { return i == -1; };
17743 // Special case: shuffle(concat(A,B)) can be more efficiently represented
17744 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17745 // half vector elements.
17746 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17747 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
17748 IsUndefMaskElt)) {
17749 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
17750 N0.getOperand(1),
17751 Mask.slice(0, NumElemsPerConcat));
17752 N1 = DAG.getUNDEF(ConcatVT);
17753 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17756 // Look at every vector that's inserted. We're looking for exact
17757 // subvector-sized copies from a concatenated vector
17758 for (unsigned I = 0; I != NumConcats; ++I) {
17759 unsigned Begin = I * NumElemsPerConcat;
17760 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
17762 // Make sure we're dealing with a copy.
17763 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
17764 Ops.push_back(DAG.getUNDEF(ConcatVT));
17765 continue;
17768 int OpIdx = -1;
17769 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
17770 if (IsUndefMaskElt(SubMask[i]))
17771 continue;
17772 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
17773 return SDValue();
17774 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
17775 if (0 <= OpIdx && EltOpIdx != OpIdx)
17776 return SDValue();
17777 OpIdx = EltOpIdx;
17779 assert(0 <= OpIdx && "Unknown concat_vectors op");
17781 if (OpIdx < (int)N0.getNumOperands())
17782 Ops.push_back(N0.getOperand(OpIdx));
17783 else
17784 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
17787 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17790 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17791 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17793 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17794 // a simplification in some sense, but it isn't appropriate in general: some
17795 // BUILD_VECTORs are substantially cheaper than others. The general case
17796 // of a BUILD_VECTOR requires inserting each element individually (or
17797 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17798 // all constants is a single constant pool load. A BUILD_VECTOR where each
17799 // element is identical is a splat. A BUILD_VECTOR where most of the operands
17800 // are undef lowers to a small number of element insertions.
17802 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17803 // We don't fold shuffles where one side is a non-zero constant, and we don't
17804 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17805 // non-constant operands. This seems to work out reasonably well in practice.
17806 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
17807 SelectionDAG &DAG,
17808 const TargetLowering &TLI) {
17809 EVT VT = SVN->getValueType(0);
17810 unsigned NumElts = VT.getVectorNumElements();
17811 SDValue N0 = SVN->getOperand(0);
17812 SDValue N1 = SVN->getOperand(1);
17814 if (!N0->hasOneUse())
17815 return SDValue();
17817 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17818 // discussed above.
17819 if (!N1.isUndef()) {
17820 if (!N1->hasOneUse())
17821 return SDValue();
17823 bool N0AnyConst = isAnyConstantBuildVector(N0);
17824 bool N1AnyConst = isAnyConstantBuildVector(N1);
17825 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17826 return SDValue();
17827 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17828 return SDValue();
17831 // If both inputs are splats of the same value then we can safely merge this
17832 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17833 bool IsSplat = false;
17834 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17835 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17836 if (BV0 && BV1)
17837 if (SDValue Splat0 = BV0->getSplatValue())
17838 IsSplat = (Splat0 == BV1->getSplatValue());
17840 SmallVector<SDValue, 8> Ops;
17841 SmallSet<SDValue, 16> DuplicateOps;
17842 for (int M : SVN->getMask()) {
17843 SDValue Op = DAG.getUNDEF(VT.getScalarType());
17844 if (M >= 0) {
17845 int Idx = M < (int)NumElts ? M : M - NumElts;
17846 SDValue &S = (M < (int)NumElts ? N0 : N1);
17847 if (S.getOpcode() == ISD::BUILD_VECTOR) {
17848 Op = S.getOperand(Idx);
17849 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17850 SDValue Op0 = S.getOperand(0);
17851 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
17852 } else {
17853 // Operand can't be combined - bail out.
17854 return SDValue();
17858 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17859 // generating a splat; semantically, this is fine, but it's likely to
17860 // generate low-quality code if the target can't reconstruct an appropriate
17861 // shuffle.
17862 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
17863 if (!IsSplat && !DuplicateOps.insert(Op).second)
17864 return SDValue();
17866 Ops.push_back(Op);
17869 // BUILD_VECTOR requires all inputs to be of the same type, find the
17870 // maximum type and extend them all.
17871 EVT SVT = VT.getScalarType();
17872 if (SVT.isInteger())
17873 for (SDValue &Op : Ops)
17874 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17875 if (SVT != VT.getScalarType())
17876 for (SDValue &Op : Ops)
17877 Op = TLI.isZExtFree(Op.getValueType(), SVT)
17878 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17879 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17880 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17883 // Match shuffles that can be converted to any_vector_extend_in_reg.
17884 // This is often generated during legalization.
17885 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17886 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17887 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
17888 SelectionDAG &DAG,
17889 const TargetLowering &TLI,
17890 bool LegalOperations) {
17891 EVT VT = SVN->getValueType(0);
17892 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17894 // TODO Add support for big-endian when we have a test case.
17895 if (!VT.isInteger() || IsBigEndian)
17896 return SDValue();
17898 unsigned NumElts = VT.getVectorNumElements();
17899 unsigned EltSizeInBits = VT.getScalarSizeInBits();
17900 ArrayRef<int> Mask = SVN->getMask();
17901 SDValue N0 = SVN->getOperand(0);
17903 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17904 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17905 for (unsigned i = 0; i != NumElts; ++i) {
17906 if (Mask[i] < 0)
17907 continue;
17908 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17909 continue;
17910 return false;
17912 return true;
17915 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17916 // power-of-2 extensions as they are the most likely.
17917 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17918 // Check for non power of 2 vector sizes
17919 if (NumElts % Scale != 0)
17920 continue;
17921 if (!isAnyExtend(Scale))
17922 continue;
17924 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17925 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17926 // Never create an illegal type. Only create unsupported operations if we
17927 // are pre-legalization.
17928 if (TLI.isTypeLegal(OutVT))
17929 if (!LegalOperations ||
17930 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17931 return DAG.getBitcast(VT,
17932 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
17933 SDLoc(SVN), OutVT, N0));
17936 return SDValue();
17939 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17940 // each source element of a large type into the lowest elements of a smaller
17941 // destination type. This is often generated during legalization.
17942 // If the source node itself was a '*_extend_vector_inreg' node then we should
17943 // then be able to remove it.
17944 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17945 SelectionDAG &DAG) {
17946 EVT VT = SVN->getValueType(0);
17947 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17949 // TODO Add support for big-endian when we have a test case.
17950 if (!VT.isInteger() || IsBigEndian)
17951 return SDValue();
17953 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17955 unsigned Opcode = N0.getOpcode();
17956 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17957 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17958 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17959 return SDValue();
17961 SDValue N00 = N0.getOperand(0);
17962 ArrayRef<int> Mask = SVN->getMask();
17963 unsigned NumElts = VT.getVectorNumElements();
17964 unsigned EltSizeInBits = VT.getScalarSizeInBits();
17965 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17966 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17968 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17969 return SDValue();
17970 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17972 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17973 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17974 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17975 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17976 for (unsigned i = 0; i != NumElts; ++i) {
17977 if (Mask[i] < 0)
17978 continue;
17979 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17980 continue;
17981 return false;
17983 return true;
17986 // At the moment we just handle the case where we've truncated back to the
17987 // same size as before the extension.
17988 // TODO: handle more extension/truncation cases as cases arise.
17989 if (EltSizeInBits != ExtSrcSizeInBits)
17990 return SDValue();
17992 // We can remove *extend_vector_inreg only if the truncation happens at
17993 // the same scale as the extension.
17994 if (isTruncate(ExtScale))
17995 return DAG.getBitcast(VT, N00);
17997 return SDValue();
18000 // Combine shuffles of splat-shuffles of the form:
18001 // shuffle (shuffle V, undef, splat-mask), undef, M
18002 // If splat-mask contains undef elements, we need to be careful about
18003 // introducing undef's in the folded mask which are not the result of composing
18004 // the masks of the shuffles.
18005 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18006 SelectionDAG &DAG) {
18007 if (!Shuf->getOperand(1).isUndef())
18008 return SDValue();
18009 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18010 if (!Splat || !Splat->isSplat())
18011 return SDValue();
18013 ArrayRef<int> ShufMask = Shuf->getMask();
18014 ArrayRef<int> SplatMask = Splat->getMask();
18015 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18017 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18018 // every undef mask element in the splat-shuffle has a corresponding undef
18019 // element in the user-shuffle's mask or if the composition of mask elements
18020 // would result in undef.
18021 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18022 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18023 // In this case it is not legal to simplify to the splat-shuffle because we
18024 // may be exposing the users of the shuffle an undef element at index 1
18025 // which was not there before the combine.
18026 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18027 // In this case the composition of masks yields SplatMask, so it's ok to
18028 // simplify to the splat-shuffle.
18029 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18030 // In this case the composed mask includes all undef elements of SplatMask
18031 // and in addition sets element zero to undef. It is safe to simplify to
18032 // the splat-shuffle.
18033 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18034 ArrayRef<int> SplatMask) {
18035 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18036 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18037 SplatMask[UserMask[i]] != -1)
18038 return false;
18039 return true;
18041 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18042 return Shuf->getOperand(0);
18044 // Create a new shuffle with a mask that is composed of the two shuffles'
18045 // masks.
18046 SmallVector<int, 32> NewMask;
18047 for (int Idx : ShufMask)
18048 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18050 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18051 Splat->getOperand(0), Splat->getOperand(1),
18052 NewMask);
18055 /// If the shuffle mask is taking exactly one element from the first vector
18056 /// operand and passing through all other elements from the second vector
18057 /// operand, return the index of the mask element that is choosing an element
18058 /// from the first operand. Otherwise, return -1.
18059 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18060 int MaskSize = Mask.size();
18061 int EltFromOp0 = -1;
18062 // TODO: This does not match if there are undef elements in the shuffle mask.
18063 // Should we ignore undefs in the shuffle mask instead? The trade-off is
18064 // removing an instruction (a shuffle), but losing the knowledge that some
18065 // vector lanes are not needed.
18066 for (int i = 0; i != MaskSize; ++i) {
18067 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18068 // We're looking for a shuffle of exactly one element from operand 0.
18069 if (EltFromOp0 != -1)
18070 return -1;
18071 EltFromOp0 = i;
18072 } else if (Mask[i] != i + MaskSize) {
18073 // Nothing from operand 1 can change lanes.
18074 return -1;
18077 return EltFromOp0;
18080 /// If a shuffle inserts exactly one element from a source vector operand into
18081 /// another vector operand and we can access the specified element as a scalar,
18082 /// then we can eliminate the shuffle.
18083 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18084 SelectionDAG &DAG) {
18085 // First, check if we are taking one element of a vector and shuffling that
18086 // element into another vector.
18087 ArrayRef<int> Mask = Shuf->getMask();
18088 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18089 SDValue Op0 = Shuf->getOperand(0);
18090 SDValue Op1 = Shuf->getOperand(1);
18091 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18092 if (ShufOp0Index == -1) {
18093 // Commute mask and check again.
18094 ShuffleVectorSDNode::commuteMask(CommutedMask);
18095 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18096 if (ShufOp0Index == -1)
18097 return SDValue();
18098 // Commute operands to match the commuted shuffle mask.
18099 std::swap(Op0, Op1);
18100 Mask = CommutedMask;
18103 // The shuffle inserts exactly one element from operand 0 into operand 1.
18104 // Now see if we can access that element as a scalar via a real insert element
18105 // instruction.
18106 // TODO: We can try harder to locate the element as a scalar. Examples: it
18107 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18108 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18109 "Shuffle mask value must be from operand 0");
18110 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18111 return SDValue();
18113 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18114 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18115 return SDValue();
18117 // There's an existing insertelement with constant insertion index, so we
18118 // don't need to check the legality/profitability of a replacement operation
18119 // that differs at most in the constant value. The target should be able to
18120 // lower any of those in a similar way. If not, legalization will expand this
18121 // to a scalar-to-vector plus shuffle.
18123 // Note that the shuffle may move the scalar from the position that the insert
18124 // element used. Therefore, our new insert element occurs at the shuffle's
18125 // mask index value, not the insert's index value.
18126 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18127 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18128 Op0.getOperand(2).getValueType());
18129 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18130 Op1, Op0.getOperand(1), NewInsIndex);
18133 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18134 /// completely. This has the potential to lose undef knowledge because the first
18135 /// shuffle may not have an undef mask element where the second one does. So
18136 /// only call this after doing simplifications based on demanded elements.
18137 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18138 // shuf (shuf0 X, Y, Mask0), undef, Mask
18139 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18140 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18141 return SDValue();
18143 ArrayRef<int> Mask = Shuf->getMask();
18144 ArrayRef<int> Mask0 = Shuf0->getMask();
18145 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18146 // Ignore undef elements.
18147 if (Mask[i] == -1)
18148 continue;
18149 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18151 // Is the element of the shuffle operand chosen by this shuffle the same as
18152 // the element chosen by the shuffle operand itself?
18153 if (Mask0[Mask[i]] != Mask0[i])
18154 return SDValue();
18156 // Every element of this shuffle is identical to the result of the previous
18157 // shuffle, so we can replace this value.
18158 return Shuf->getOperand(0);
18161 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18162 EVT VT = N->getValueType(0);
18163 unsigned NumElts = VT.getVectorNumElements();
18165 SDValue N0 = N->getOperand(0);
18166 SDValue N1 = N->getOperand(1);
18168 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18170 // Canonicalize shuffle undef, undef -> undef
18171 if (N0.isUndef() && N1.isUndef())
18172 return DAG.getUNDEF(VT);
18174 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18176 // Canonicalize shuffle v, v -> v, undef
18177 if (N0 == N1) {
18178 SmallVector<int, 8> NewMask;
18179 for (unsigned i = 0; i != NumElts; ++i) {
18180 int Idx = SVN->getMaskElt(i);
18181 if (Idx >= (int)NumElts) Idx -= NumElts;
18182 NewMask.push_back(Idx);
18184 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18187 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
18188 if (N0.isUndef())
18189 return DAG.getCommutedVectorShuffle(*SVN);
18191 // Remove references to rhs if it is undef
18192 if (N1.isUndef()) {
18193 bool Changed = false;
18194 SmallVector<int, 8> NewMask;
18195 for (unsigned i = 0; i != NumElts; ++i) {
18196 int Idx = SVN->getMaskElt(i);
18197 if (Idx >= (int)NumElts) {
18198 Idx = -1;
18199 Changed = true;
18201 NewMask.push_back(Idx);
18203 if (Changed)
18204 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18207 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18208 return InsElt;
18210 // A shuffle of a single vector that is a splatted value can always be folded.
18211 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18212 return V;
18214 // If it is a splat, check if the argument vector is another splat or a
18215 // build_vector.
18216 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18217 int SplatIndex = SVN->getSplatIndex();
18218 if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18219 ISD::isBinaryOp(N0.getNode())) {
18220 // splat (vector_bo L, R), Index -->
18221 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18222 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18223 SDLoc DL(N);
18224 EVT EltVT = VT.getScalarType();
18225 SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18226 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18227 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18228 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18229 N0.getNode()->getFlags());
18230 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18231 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18232 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18235 // If this is a bit convert that changes the element type of the vector but
18236 // not the number of vector elements, look through it. Be careful not to
18237 // look though conversions that change things like v4f32 to v2f64.
18238 SDNode *V = N0.getNode();
18239 if (V->getOpcode() == ISD::BITCAST) {
18240 SDValue ConvInput = V->getOperand(0);
18241 if (ConvInput.getValueType().isVector() &&
18242 ConvInput.getValueType().getVectorNumElements() == NumElts)
18243 V = ConvInput.getNode();
18246 if (V->getOpcode() == ISD::BUILD_VECTOR) {
18247 assert(V->getNumOperands() == NumElts &&
18248 "BUILD_VECTOR has wrong number of operands");
18249 SDValue Base;
18250 bool AllSame = true;
18251 for (unsigned i = 0; i != NumElts; ++i) {
18252 if (!V->getOperand(i).isUndef()) {
18253 Base = V->getOperand(i);
18254 break;
18257 // Splat of <u, u, u, u>, return <u, u, u, u>
18258 if (!Base.getNode())
18259 return N0;
18260 for (unsigned i = 0; i != NumElts; ++i) {
18261 if (V->getOperand(i) != Base) {
18262 AllSame = false;
18263 break;
18266 // Splat of <x, x, x, x>, return <x, x, x, x>
18267 if (AllSame)
18268 return N0;
18270 // Canonicalize any other splat as a build_vector.
18271 SDValue Splatted = V->getOperand(SplatIndex);
18272 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18273 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18275 // We may have jumped through bitcasts, so the type of the
18276 // BUILD_VECTOR may not match the type of the shuffle.
18277 if (V->getValueType(0) != VT)
18278 NewBV = DAG.getBitcast(VT, NewBV);
18279 return NewBV;
18283 // Simplify source operands based on shuffle mask.
18284 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18285 return SDValue(N, 0);
18287 // This is intentionally placed after demanded elements simplification because
18288 // it could eliminate knowledge of undef elements created by this shuffle.
18289 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18290 return ShufOp;
18292 // Match shuffles that can be converted to any_vector_extend_in_reg.
18293 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18294 return V;
18296 // Combine "truncate_vector_in_reg" style shuffles.
18297 if (SDValue V = combineTruncationShuffle(SVN, DAG))
18298 return V;
18300 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18301 Level < AfterLegalizeVectorOps &&
18302 (N1.isUndef() ||
18303 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18304 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18305 if (SDValue V = partitionShuffleOfConcats(N, DAG))
18306 return V;
18309 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18310 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18311 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18312 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18313 return Res;
18315 // If this shuffle only has a single input that is a bitcasted shuffle,
18316 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18317 // back to their original types.
18318 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18319 N1.isUndef() && Level < AfterLegalizeVectorOps &&
18320 TLI.isTypeLegal(VT)) {
18321 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18322 if (Scale == 1)
18323 return SmallVector<int, 8>(Mask.begin(), Mask.end());
18325 SmallVector<int, 8> NewMask;
18326 for (int M : Mask)
18327 for (int s = 0; s != Scale; ++s)
18328 NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18329 return NewMask;
18332 SDValue BC0 = peekThroughOneUseBitcasts(N0);
18333 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18334 EVT SVT = VT.getScalarType();
18335 EVT InnerVT = BC0->getValueType(0);
18336 EVT InnerSVT = InnerVT.getScalarType();
18338 // Determine which shuffle works with the smaller scalar type.
18339 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
18340 EVT ScaleSVT = ScaleVT.getScalarType();
18342 if (TLI.isTypeLegal(ScaleVT) &&
18343 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
18344 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
18345 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18346 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18348 // Scale the shuffle masks to the smaller scalar type.
18349 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
18350 SmallVector<int, 8> InnerMask =
18351 ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
18352 SmallVector<int, 8> OuterMask =
18353 ScaleShuffleMask(SVN->getMask(), OuterScale);
18355 // Merge the shuffle masks.
18356 SmallVector<int, 8> NewMask;
18357 for (int M : OuterMask)
18358 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
18360 // Test for shuffle mask legality over both commutations.
18361 SDValue SV0 = BC0->getOperand(0);
18362 SDValue SV1 = BC0->getOperand(1);
18363 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18364 if (!LegalMask) {
18365 std::swap(SV0, SV1);
18366 ShuffleVectorSDNode::commuteMask(NewMask);
18367 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18370 if (LegalMask) {
18371 SV0 = DAG.getBitcast(ScaleVT, SV0);
18372 SV1 = DAG.getBitcast(ScaleVT, SV1);
18373 return DAG.getBitcast(
18374 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
18380 // Canonicalize shuffles according to rules:
18381 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
18382 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
18383 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
18384 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
18385 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
18386 TLI.isTypeLegal(VT)) {
18387 // The incoming shuffle must be of the same type as the result of the
18388 // current shuffle.
18389 assert(N1->getOperand(0).getValueType() == VT &&
18390 "Shuffle types don't match");
18392 SDValue SV0 = N1->getOperand(0);
18393 SDValue SV1 = N1->getOperand(1);
18394 bool HasSameOp0 = N0 == SV0;
18395 bool IsSV1Undef = SV1.isUndef();
18396 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
18397 // Commute the operands of this shuffle so that next rule
18398 // will trigger.
18399 return DAG.getCommutedVectorShuffle(*SVN);
18402 // Try to fold according to rules:
18403 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18404 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18405 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18406 // Don't try to fold shuffles with illegal type.
18407 // Only fold if this shuffle is the only user of the other shuffle.
18408 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
18409 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
18410 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
18412 // Don't try to fold splats; they're likely to simplify somehow, or they
18413 // might be free.
18414 if (OtherSV->isSplat())
18415 return SDValue();
18417 // The incoming shuffle must be of the same type as the result of the
18418 // current shuffle.
18419 assert(OtherSV->getOperand(0).getValueType() == VT &&
18420 "Shuffle types don't match");
18422 SDValue SV0, SV1;
18423 SmallVector<int, 4> Mask;
18424 // Compute the combined shuffle mask for a shuffle with SV0 as the first
18425 // operand, and SV1 as the second operand.
18426 for (unsigned i = 0; i != NumElts; ++i) {
18427 int Idx = SVN->getMaskElt(i);
18428 if (Idx < 0) {
18429 // Propagate Undef.
18430 Mask.push_back(Idx);
18431 continue;
18434 SDValue CurrentVec;
18435 if (Idx < (int)NumElts) {
18436 // This shuffle index refers to the inner shuffle N0. Lookup the inner
18437 // shuffle mask to identify which vector is actually referenced.
18438 Idx = OtherSV->getMaskElt(Idx);
18439 if (Idx < 0) {
18440 // Propagate Undef.
18441 Mask.push_back(Idx);
18442 continue;
18445 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
18446 : OtherSV->getOperand(1);
18447 } else {
18448 // This shuffle index references an element within N1.
18449 CurrentVec = N1;
18452 // Simple case where 'CurrentVec' is UNDEF.
18453 if (CurrentVec.isUndef()) {
18454 Mask.push_back(-1);
18455 continue;
18458 // Canonicalize the shuffle index. We don't know yet if CurrentVec
18459 // will be the first or second operand of the combined shuffle.
18460 Idx = Idx % NumElts;
18461 if (!SV0.getNode() || SV0 == CurrentVec) {
18462 // Ok. CurrentVec is the left hand side.
18463 // Update the mask accordingly.
18464 SV0 = CurrentVec;
18465 Mask.push_back(Idx);
18466 continue;
18469 // Bail out if we cannot convert the shuffle pair into a single shuffle.
18470 if (SV1.getNode() && SV1 != CurrentVec)
18471 return SDValue();
18473 // Ok. CurrentVec is the right hand side.
18474 // Update the mask accordingly.
18475 SV1 = CurrentVec;
18476 Mask.push_back(Idx + NumElts);
18479 // Check if all indices in Mask are Undef. In case, propagate Undef.
18480 bool isUndefMask = true;
18481 for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
18482 isUndefMask &= Mask[i] < 0;
18484 if (isUndefMask)
18485 return DAG.getUNDEF(VT);
18487 if (!SV0.getNode())
18488 SV0 = DAG.getUNDEF(VT);
18489 if (!SV1.getNode())
18490 SV1 = DAG.getUNDEF(VT);
18492 // Avoid introducing shuffles with illegal mask.
18493 if (!TLI.isShuffleMaskLegal(Mask, VT)) {
18494 ShuffleVectorSDNode::commuteMask(Mask);
18496 if (!TLI.isShuffleMaskLegal(Mask, VT))
18497 return SDValue();
18499 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
18500 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
18501 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
18502 std::swap(SV0, SV1);
18505 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18506 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18507 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18508 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
18511 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
18512 return V;
18514 return SDValue();
18517 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
18518 SDValue InVal = N->getOperand(0);
18519 EVT VT = N->getValueType(0);
18521 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
18522 // with a VECTOR_SHUFFLE and possible truncate.
18523 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
18524 SDValue InVec = InVal->getOperand(0);
18525 SDValue EltNo = InVal->getOperand(1);
18526 auto InVecT = InVec.getValueType();
18527 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
18528 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
18529 int Elt = C0->getZExtValue();
18530 NewMask[0] = Elt;
18531 SDValue Val;
18532 // If we have an implict truncate do truncate here as long as it's legal.
18533 // if it's not legal, this should
18534 if (VT.getScalarType() != InVal.getValueType() &&
18535 InVal.getValueType().isScalarInteger() &&
18536 isTypeLegal(VT.getScalarType())) {
18537 Val =
18538 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
18539 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
18541 if (VT.getScalarType() == InVecT.getScalarType() &&
18542 VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
18543 TLI.isShuffleMaskLegal(NewMask, VT)) {
18544 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
18545 DAG.getUNDEF(InVecT), NewMask);
18546 // If the initial vector is the correct size this shuffle is a
18547 // valid result.
18548 if (VT == InVecT)
18549 return Val;
18550 // If not we must truncate the vector.
18551 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
18552 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
18553 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
18554 EVT SubVT =
18555 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
18556 VT.getVectorNumElements());
18557 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
18558 ZeroIdx);
18559 return Val;
18565 return SDValue();
18568 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
18569 EVT VT = N->getValueType(0);
18570 SDValue N0 = N->getOperand(0);
18571 SDValue N1 = N->getOperand(1);
18572 SDValue N2 = N->getOperand(2);
18574 // If inserting an UNDEF, just return the original vector.
18575 if (N1.isUndef())
18576 return N0;
18578 // If this is an insert of an extracted vector into an undef vector, we can
18579 // just use the input to the extract.
18580 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18581 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
18582 return N1.getOperand(0);
18584 // If we are inserting a bitcast value into an undef, with the same
18585 // number of elements, just use the bitcast input of the extract.
18586 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
18587 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
18588 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
18589 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18590 N1.getOperand(0).getOperand(1) == N2 &&
18591 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
18592 VT.getVectorNumElements() &&
18593 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
18594 VT.getSizeInBits()) {
18595 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
18598 // If both N1 and N2 are bitcast values on which insert_subvector
18599 // would makes sense, pull the bitcast through.
18600 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
18601 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
18602 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
18603 SDValue CN0 = N0.getOperand(0);
18604 SDValue CN1 = N1.getOperand(0);
18605 EVT CN0VT = CN0.getValueType();
18606 EVT CN1VT = CN1.getValueType();
18607 if (CN0VT.isVector() && CN1VT.isVector() &&
18608 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
18609 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
18610 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
18611 CN0.getValueType(), CN0, CN1, N2);
18612 return DAG.getBitcast(VT, NewINSERT);
18616 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
18617 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
18618 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
18619 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
18620 N0.getOperand(1).getValueType() == N1.getValueType() &&
18621 N0.getOperand(2) == N2)
18622 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18623 N1, N2);
18625 // Eliminate an intermediate insert into an undef vector:
18626 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18627 // insert_subvector undef, X, N2
18628 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18629 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18630 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18631 N1.getOperand(1), N2);
18633 if (!isa<ConstantSDNode>(N2))
18634 return SDValue();
18636 unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18638 // Canonicalize insert_subvector dag nodes.
18639 // Example:
18640 // (insert_subvector (insert_subvector A, Idx0), Idx1)
18641 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18642 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18643 N1.getValueType() == N0.getOperand(1).getValueType() &&
18644 isa<ConstantSDNode>(N0.getOperand(2))) {
18645 unsigned OtherIdx = N0.getConstantOperandVal(2);
18646 if (InsIdx < OtherIdx) {
18647 // Swap nodes.
18648 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18649 N0.getOperand(0), N1, N2);
18650 AddToWorklist(NewOp.getNode());
18651 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18652 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18656 // If the input vector is a concatenation, and the insert replaces
18657 // one of the pieces, we can optimize into a single concat_vectors.
18658 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18659 N0.getOperand(0).getValueType() == N1.getValueType()) {
18660 unsigned Factor = N1.getValueType().getVectorNumElements();
18662 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18663 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18665 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18668 // Simplify source operands based on insertion.
18669 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18670 return SDValue(N, 0);
18672 return SDValue();
18675 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18676 SDValue N0 = N->getOperand(0);
18678 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18679 if (N0->getOpcode() == ISD::FP16_TO_FP)
18680 return N0->getOperand(0);
18682 return SDValue();
18685 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18686 SDValue N0 = N->getOperand(0);
18688 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18689 if (N0->getOpcode() == ISD::AND) {
18690 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
18691 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18692 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18693 N0.getOperand(0));
18697 return SDValue();
18700 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
18701 SDValue N0 = N->getOperand(0);
18702 EVT VT = N0.getValueType();
18703 unsigned Opcode = N->getOpcode();
18705 // VECREDUCE over 1-element vector is just an extract.
18706 if (VT.getVectorNumElements() == 1) {
18707 SDLoc dl(N);
18708 SDValue Res = DAG.getNode(
18709 ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
18710 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
18711 if (Res.getValueType() != N->getValueType(0))
18712 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
18713 return Res;
18716 // On an boolean vector an and/or reduction is the same as a umin/umax
18717 // reduction. Convert them if the latter is legal while the former isn't.
18718 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
18719 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
18720 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
18721 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
18722 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
18723 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
18724 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
18727 return SDValue();
18730 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18731 /// with the destination vector and a zero vector.
18732 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18733 /// vector_shuffle V, Zero, <0, 4, 2, 4>
18734 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18735 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18737 EVT VT = N->getValueType(0);
18738 SDValue LHS = N->getOperand(0);
18739 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18740 SDLoc DL(N);
18742 // Make sure we're not running after operation legalization where it
18743 // may have custom lowered the vector shuffles.
18744 if (LegalOperations)
18745 return SDValue();
18747 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18748 return SDValue();
18750 EVT RVT = RHS.getValueType();
18751 unsigned NumElts = RHS.getNumOperands();
18753 // Attempt to create a valid clear mask, splitting the mask into
18754 // sub elements and checking to see if each is
18755 // all zeros or all ones - suitable for shuffle masking.
18756 auto BuildClearMask = [&](int Split) {
18757 int NumSubElts = NumElts * Split;
18758 int NumSubBits = RVT.getScalarSizeInBits() / Split;
18760 SmallVector<int, 8> Indices;
18761 for (int i = 0; i != NumSubElts; ++i) {
18762 int EltIdx = i / Split;
18763 int SubIdx = i % Split;
18764 SDValue Elt = RHS.getOperand(EltIdx);
18765 if (Elt.isUndef()) {
18766 Indices.push_back(-1);
18767 continue;
18770 APInt Bits;
18771 if (isa<ConstantSDNode>(Elt))
18772 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18773 else if (isa<ConstantFPSDNode>(Elt))
18774 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18775 else
18776 return SDValue();
18778 // Extract the sub element from the constant bit mask.
18779 if (DAG.getDataLayout().isBigEndian()) {
18780 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18781 } else {
18782 Bits.lshrInPlace(SubIdx * NumSubBits);
18785 if (Split > 1)
18786 Bits = Bits.trunc(NumSubBits);
18788 if (Bits.isAllOnesValue())
18789 Indices.push_back(i);
18790 else if (Bits == 0)
18791 Indices.push_back(i + NumSubElts);
18792 else
18793 return SDValue();
18796 // Let's see if the target supports this vector_shuffle.
18797 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18798 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18799 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18800 return SDValue();
18802 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18803 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18804 DAG.getBitcast(ClearVT, LHS),
18805 Zero, Indices));
18808 // Determine maximum split level (byte level masking).
18809 int MaxSplit = 1;
18810 if (RVT.getScalarSizeInBits() % 8 == 0)
18811 MaxSplit = RVT.getScalarSizeInBits() / 8;
18813 for (int Split = 1; Split <= MaxSplit; ++Split)
18814 if (RVT.getScalarSizeInBits() % Split == 0)
18815 if (SDValue S = BuildClearMask(Split))
18816 return S;
18818 return SDValue();
18821 /// If a vector binop is performed on splat values, it may be profitable to
18822 /// extract, scalarize, and insert/splat.
18823 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
18824 SDValue N0 = N->getOperand(0);
18825 SDValue N1 = N->getOperand(1);
18826 unsigned Opcode = N->getOpcode();
18827 EVT VT = N->getValueType(0);
18828 EVT EltVT = VT.getVectorElementType();
18829 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18831 // TODO: Remove/replace the extract cost check? If the elements are available
18832 // as scalars, then there may be no extract cost. Should we ask if
18833 // inserting a scalar back into a vector is cheap instead?
18834 int Index0, Index1;
18835 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
18836 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
18837 if (!Src0 || !Src1 || Index0 != Index1 ||
18838 Src0.getValueType().getVectorElementType() != EltVT ||
18839 Src1.getValueType().getVectorElementType() != EltVT ||
18840 !TLI.isExtractVecEltCheap(VT, Index0) ||
18841 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
18842 return SDValue();
18844 SDLoc DL(N);
18845 SDValue IndexC =
18846 DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
18847 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
18848 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
18849 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
18851 // If all lanes but 1 are undefined, no need to splat the scalar result.
18852 // TODO: Keep track of undefs and use that info in the general case.
18853 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
18854 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
18855 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
18856 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
18857 // build_vec ..undef, (bo X, Y), undef...
18858 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
18859 Ops[Index0] = ScalarBO;
18860 return DAG.getBuildVector(VT, DL, Ops);
18863 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
18864 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
18865 return DAG.getBuildVector(VT, DL, Ops);
18868 /// Visit a binary vector operation, like ADD.
18869 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
18870 assert(N->getValueType(0).isVector() &&
18871 "SimplifyVBinOp only works on vectors!");
18873 SDValue LHS = N->getOperand(0);
18874 SDValue RHS = N->getOperand(1);
18875 SDValue Ops[] = {LHS, RHS};
18876 EVT VT = N->getValueType(0);
18877 unsigned Opcode = N->getOpcode();
18879 // See if we can constant fold the vector operation.
18880 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
18881 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
18882 return Fold;
18884 // Move unary shuffles with identical masks after a vector binop:
18885 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
18886 // --> shuffle (VBinOp A, B), Undef, Mask
18887 // This does not require type legality checks because we are creating the
18888 // same types of operations that are in the original sequence. We do have to
18889 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
18890 // though. This code is adapted from the identical transform in instcombine.
18891 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
18892 Opcode != ISD::UREM && Opcode != ISD::SREM &&
18893 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
18894 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
18895 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
18896 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
18897 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
18898 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
18899 SDLoc DL(N);
18900 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
18901 RHS.getOperand(0), N->getFlags());
18902 SDValue UndefV = LHS.getOperand(1);
18903 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
18907 // The following pattern is likely to emerge with vector reduction ops. Moving
18908 // the binary operation ahead of insertion may allow using a narrower vector
18909 // instruction that has better performance than the wide version of the op:
18910 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
18911 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
18912 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
18913 LHS.getOperand(2) == RHS.getOperand(2) &&
18914 (LHS.hasOneUse() || RHS.hasOneUse())) {
18915 SDValue X = LHS.getOperand(1);
18916 SDValue Y = RHS.getOperand(1);
18917 SDValue Z = LHS.getOperand(2);
18918 EVT NarrowVT = X.getValueType();
18919 if (NarrowVT == Y.getValueType() &&
18920 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
18921 // (binop undef, undef) may not return undef, so compute that result.
18922 SDLoc DL(N);
18923 SDValue VecC =
18924 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
18925 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
18926 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
18930 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
18931 return V;
18933 return SDValue();
18936 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
18937 SDValue N2) {
18938 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
18940 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
18941 cast<CondCodeSDNode>(N0.getOperand(2))->get());
18943 // If we got a simplified select_cc node back from SimplifySelectCC, then
18944 // break it down into a new SETCC node, and a new SELECT node, and then return
18945 // the SELECT node, since we were called with a SELECT node.
18946 if (SCC.getNode()) {
18947 // Check to see if we got a select_cc back (to turn into setcc/select).
18948 // Otherwise, just return whatever node we got back, like fabs.
18949 if (SCC.getOpcode() == ISD::SELECT_CC) {
18950 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
18951 N0.getValueType(),
18952 SCC.getOperand(0), SCC.getOperand(1),
18953 SCC.getOperand(4));
18954 AddToWorklist(SETCC.getNode());
18955 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
18956 SCC.getOperand(2), SCC.getOperand(3));
18959 return SCC;
18961 return SDValue();
18964 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18965 /// being selected between, see if we can simplify the select. Callers of this
18966 /// should assume that TheSelect is deleted if this returns true. As such, they
18967 /// should return the appropriate thing (e.g. the node) back to the top-level of
18968 /// the DAG combiner loop to avoid it being looked at.
18969 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
18970 SDValue RHS) {
18971 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18972 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18973 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
18974 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
18975 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18976 SDValue Sqrt = RHS;
18977 ISD::CondCode CC;
18978 SDValue CmpLHS;
18979 const ConstantFPSDNode *Zero = nullptr;
18981 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
18982 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
18983 CmpLHS = TheSelect->getOperand(0);
18984 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
18985 } else {
18986 // SELECT or VSELECT
18987 SDValue Cmp = TheSelect->getOperand(0);
18988 if (Cmp.getOpcode() == ISD::SETCC) {
18989 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
18990 CmpLHS = Cmp.getOperand(0);
18991 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
18994 if (Zero && Zero->isZero() &&
18995 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
18996 CC == ISD::SETULT || CC == ISD::SETLT)) {
18997 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18998 CombineTo(TheSelect, Sqrt);
18999 return true;
19003 // Cannot simplify select with vector condition
19004 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19006 // If this is a select from two identical things, try to pull the operation
19007 // through the select.
19008 if (LHS.getOpcode() != RHS.getOpcode() ||
19009 !LHS.hasOneUse() || !RHS.hasOneUse())
19010 return false;
19012 // If this is a load and the token chain is identical, replace the select
19013 // of two loads with a load through a select of the address to load from.
19014 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19015 // constants have been dropped into the constant pool.
19016 if (LHS.getOpcode() == ISD::LOAD) {
19017 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19018 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19020 // Token chains must be identical.
19021 if (LHS.getOperand(0) != RHS.getOperand(0) ||
19022 // Do not let this transformation reduce the number of volatile loads.
19023 LLD->isVolatile() || RLD->isVolatile() ||
19024 // FIXME: If either is a pre/post inc/dec load,
19025 // we'd need to split out the address adjustment.
19026 LLD->isIndexed() || RLD->isIndexed() ||
19027 // If this is an EXTLOAD, the VT's must match.
19028 LLD->getMemoryVT() != RLD->getMemoryVT() ||
19029 // If this is an EXTLOAD, the kind of extension must match.
19030 (LLD->getExtensionType() != RLD->getExtensionType() &&
19031 // The only exception is if one of the extensions is anyext.
19032 LLD->getExtensionType() != ISD::EXTLOAD &&
19033 RLD->getExtensionType() != ISD::EXTLOAD) ||
19034 // FIXME: this discards src value information. This is
19035 // over-conservative. It would be beneficial to be able to remember
19036 // both potential memory locations. Since we are discarding
19037 // src value info, don't do the transformation if the memory
19038 // locations are not in the default address space.
19039 LLD->getPointerInfo().getAddrSpace() != 0 ||
19040 RLD->getPointerInfo().getAddrSpace() != 0 ||
19041 // We can't produce a CMOV of a TargetFrameIndex since we won't
19042 // generate the address generation required.
19043 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19044 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19045 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19046 LLD->getBasePtr().getValueType()))
19047 return false;
19049 // The loads must not depend on one another.
19050 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19051 return false;
19053 // Check that the select condition doesn't reach either load. If so,
19054 // folding this will induce a cycle into the DAG. If not, this is safe to
19055 // xform, so create a select of the addresses.
19057 SmallPtrSet<const SDNode *, 32> Visited;
19058 SmallVector<const SDNode *, 16> Worklist;
19060 // Always fail if LLD and RLD are not independent. TheSelect is a
19061 // predecessor to all Nodes in question so we need not search past it.
19063 Visited.insert(TheSelect);
19064 Worklist.push_back(LLD);
19065 Worklist.push_back(RLD);
19067 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19068 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19069 return false;
19071 SDValue Addr;
19072 if (TheSelect->getOpcode() == ISD::SELECT) {
19073 // We cannot do this optimization if any pair of {RLD, LLD} is a
19074 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19075 // Loads, we only need to check if CondNode is a successor to one of the
19076 // loads. We can further avoid this if there's no use of their chain
19077 // value.
19078 SDNode *CondNode = TheSelect->getOperand(0).getNode();
19079 Worklist.push_back(CondNode);
19081 if ((LLD->hasAnyUseOfValue(1) &&
19082 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19083 (RLD->hasAnyUseOfValue(1) &&
19084 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19085 return false;
19087 Addr = DAG.getSelect(SDLoc(TheSelect),
19088 LLD->getBasePtr().getValueType(),
19089 TheSelect->getOperand(0), LLD->getBasePtr(),
19090 RLD->getBasePtr());
19091 } else { // Otherwise SELECT_CC
19092 // We cannot do this optimization if any pair of {RLD, LLD} is a
19093 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19094 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19095 // one of the loads. We can further avoid this if there's no use of their
19096 // chain value.
19098 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19099 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19100 Worklist.push_back(CondLHS);
19101 Worklist.push_back(CondRHS);
19103 if ((LLD->hasAnyUseOfValue(1) &&
19104 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19105 (RLD->hasAnyUseOfValue(1) &&
19106 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19107 return false;
19109 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19110 LLD->getBasePtr().getValueType(),
19111 TheSelect->getOperand(0),
19112 TheSelect->getOperand(1),
19113 LLD->getBasePtr(), RLD->getBasePtr(),
19114 TheSelect->getOperand(4));
19117 SDValue Load;
19118 // It is safe to replace the two loads if they have different alignments,
19119 // but the new load must be the minimum (most restrictive) alignment of the
19120 // inputs.
19121 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19122 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19123 if (!RLD->isInvariant())
19124 MMOFlags &= ~MachineMemOperand::MOInvariant;
19125 if (!RLD->isDereferenceable())
19126 MMOFlags &= ~MachineMemOperand::MODereferenceable;
19127 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19128 // FIXME: Discards pointer and AA info.
19129 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19130 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19131 MMOFlags);
19132 } else {
19133 // FIXME: Discards pointer and AA info.
19134 Load = DAG.getExtLoad(
19135 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19136 : LLD->getExtensionType(),
19137 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19138 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19141 // Users of the select now use the result of the load.
19142 CombineTo(TheSelect, Load);
19144 // Users of the old loads now use the new load's chain. We know the
19145 // old-load value is dead now.
19146 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19147 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19148 return true;
19151 return false;
19154 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19155 /// bitwise 'and'.
19156 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19157 SDValue N1, SDValue N2, SDValue N3,
19158 ISD::CondCode CC) {
19159 // If this is a select where the false operand is zero and the compare is a
19160 // check of the sign bit, see if we can perform the "gzip trick":
19161 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19162 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19163 EVT XType = N0.getValueType();
19164 EVT AType = N2.getValueType();
19165 if (!isNullConstant(N3) || !XType.bitsGE(AType))
19166 return SDValue();
19168 // If the comparison is testing for a positive value, we have to invert
19169 // the sign bit mask, so only do that transform if the target has a bitwise
19170 // 'and not' instruction (the invert is free).
19171 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19172 // (X > -1) ? A : 0
19173 // (X > 0) ? X : 0 <-- This is canonical signed max.
19174 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19175 return SDValue();
19176 } else if (CC == ISD::SETLT) {
19177 // (X < 0) ? A : 0
19178 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
19179 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19180 return SDValue();
19181 } else {
19182 return SDValue();
19185 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19186 // constant.
19187 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19188 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19189 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19190 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19191 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19192 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19193 AddToWorklist(Shift.getNode());
19195 if (XType.bitsGT(AType)) {
19196 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19197 AddToWorklist(Shift.getNode());
19200 if (CC == ISD::SETGT)
19201 Shift = DAG.getNOT(DL, Shift, AType);
19203 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19206 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19207 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19208 AddToWorklist(Shift.getNode());
19210 if (XType.bitsGT(AType)) {
19211 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19212 AddToWorklist(Shift.getNode());
19215 if (CC == ISD::SETGT)
19216 Shift = DAG.getNOT(DL, Shift, AType);
19218 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19221 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19222 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19223 /// in it. This may be a win when the constant is not otherwise available
19224 /// because it replaces two constant pool loads with one.
19225 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19226 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19227 ISD::CondCode CC) {
19228 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
19229 return SDValue();
19231 // If we are before legalize types, we want the other legalization to happen
19232 // first (for example, to avoid messing with soft float).
19233 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19234 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19235 EVT VT = N2.getValueType();
19236 if (!TV || !FV || !TLI.isTypeLegal(VT))
19237 return SDValue();
19239 // If a constant can be materialized without loads, this does not make sense.
19240 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19241 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19242 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19243 return SDValue();
19245 // If both constants have multiple uses, then we won't need to do an extra
19246 // load. The values are likely around in registers for other users.
19247 if (!TV->hasOneUse() && !FV->hasOneUse())
19248 return SDValue();
19250 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19251 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19252 Type *FPTy = Elts[0]->getType();
19253 const DataLayout &TD = DAG.getDataLayout();
19255 // Create a ConstantArray of the two constants.
19256 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19257 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19258 TD.getPrefTypeAlignment(FPTy));
19259 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19261 // Get offsets to the 0 and 1 elements of the array, so we can select between
19262 // them.
19263 SDValue Zero = DAG.getIntPtrConstant(0, DL);
19264 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19265 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19266 SDValue Cond =
19267 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19268 AddToWorklist(Cond.getNode());
19269 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19270 AddToWorklist(CstOffset.getNode());
19271 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19272 AddToWorklist(CPIdx.getNode());
19273 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19274 MachinePointerInfo::getConstantPool(
19275 DAG.getMachineFunction()), Alignment);
19278 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19279 /// where 'cond' is the comparison specified by CC.
19280 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19281 SDValue N2, SDValue N3, ISD::CondCode CC,
19282 bool NotExtCompare) {
19283 // (x ? y : y) -> y.
19284 if (N2 == N3) return N2;
19286 EVT CmpOpVT = N0.getValueType();
19287 EVT CmpResVT = getSetCCResultType(CmpOpVT);
19288 EVT VT = N2.getValueType();
19289 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19290 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19291 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19293 // Determine if the condition we're dealing with is constant.
19294 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19295 AddToWorklist(SCC.getNode());
19296 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
19297 // fold select_cc true, x, y -> x
19298 // fold select_cc false, x, y -> y
19299 return !(SCCC->isNullValue()) ? N2 : N3;
19303 if (SDValue V =
19304 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
19305 return V;
19307 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
19308 return V;
19310 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
19311 // where y is has a single bit set.
19312 // A plaintext description would be, we can turn the SELECT_CC into an AND
19313 // when the condition can be materialized as an all-ones register. Any
19314 // single bit-test can be materialized as an all-ones register with
19315 // shift-left and shift-right-arith.
19316 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
19317 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
19318 SDValue AndLHS = N0->getOperand(0);
19319 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
19320 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
19321 // Shift the tested bit over the sign bit.
19322 const APInt &AndMask = ConstAndRHS->getAPIntValue();
19323 SDValue ShlAmt =
19324 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
19325 getShiftAmountTy(AndLHS.getValueType()));
19326 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
19328 // Now arithmetic right shift it all the way over, so the result is either
19329 // all-ones, or zero.
19330 SDValue ShrAmt =
19331 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
19332 getShiftAmountTy(Shl.getValueType()));
19333 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
19335 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
19339 // fold select C, 16, 0 -> shl C, 4
19340 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
19341 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
19343 if ((Fold || Swap) &&
19344 TLI.getBooleanContents(CmpOpVT) ==
19345 TargetLowering::ZeroOrOneBooleanContent &&
19346 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
19348 if (Swap) {
19349 CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
19350 std::swap(N2C, N3C);
19353 // If the caller doesn't want us to simplify this into a zext of a compare,
19354 // don't do it.
19355 if (NotExtCompare && N2C->isOne())
19356 return SDValue();
19358 SDValue Temp, SCC;
19359 // zext (setcc n0, n1)
19360 if (LegalTypes) {
19361 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
19362 if (VT.bitsLT(SCC.getValueType()))
19363 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
19364 else
19365 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19366 } else {
19367 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
19368 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19371 AddToWorklist(SCC.getNode());
19372 AddToWorklist(Temp.getNode());
19374 if (N2C->isOne())
19375 return Temp;
19377 // shl setcc result by log2 n2c
19378 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
19379 DAG.getConstant(N2C->getAPIntValue().logBase2(),
19380 SDLoc(Temp),
19381 getShiftAmountTy(Temp.getValueType())));
19384 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
19385 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
19386 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
19387 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
19388 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
19389 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
19390 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
19391 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
19392 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
19393 SDValue ValueOnZero = N2;
19394 SDValue Count = N3;
19395 // If the condition is NE instead of E, swap the operands.
19396 if (CC == ISD::SETNE)
19397 std::swap(ValueOnZero, Count);
19398 // Check if the value on zero is a constant equal to the bits in the type.
19399 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
19400 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
19401 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
19402 // legal, combine to just cttz.
19403 if ((Count.getOpcode() == ISD::CTTZ ||
19404 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
19405 N0 == Count.getOperand(0) &&
19406 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
19407 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
19408 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
19409 // legal, combine to just ctlz.
19410 if ((Count.getOpcode() == ISD::CTLZ ||
19411 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
19412 N0 == Count.getOperand(0) &&
19413 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
19414 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
19419 return SDValue();
19422 /// This is a stub for TargetLowering::SimplifySetCC.
19423 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
19424 ISD::CondCode Cond, const SDLoc &DL,
19425 bool foldBooleans) {
19426 TargetLowering::DAGCombinerInfo
19427 DagCombineInfo(DAG, Level, false, this);
19428 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
19431 /// Given an ISD::SDIV node expressing a divide by constant, return
19432 /// a DAG expression to select that will generate the same value by multiplying
19433 /// by a magic number.
19434 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19435 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
19436 // when optimising for minimum size, we don't want to expand a div to a mul
19437 // and a shift.
19438 if (DAG.getMachineFunction().getFunction().hasMinSize())
19439 return SDValue();
19441 SmallVector<SDNode *, 8> Built;
19442 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
19443 for (SDNode *N : Built)
19444 AddToWorklist(N);
19445 return S;
19448 return SDValue();
19451 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
19452 /// DAG expression that will generate the same value by right shifting.
19453 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
19454 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
19455 if (!C)
19456 return SDValue();
19458 // Avoid division by zero.
19459 if (C->isNullValue())
19460 return SDValue();
19462 SmallVector<SDNode *, 8> Built;
19463 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
19464 for (SDNode *N : Built)
19465 AddToWorklist(N);
19466 return S;
19469 return SDValue();
19472 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
19473 /// expression that will generate the same value by multiplying by a magic
19474 /// number.
19475 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19476 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
19477 // when optimising for minimum size, we don't want to expand a div to a mul
19478 // and a shift.
19479 if (DAG.getMachineFunction().getFunction().hasMinSize())
19480 return SDValue();
19482 SmallVector<SDNode *, 8> Built;
19483 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
19484 for (SDNode *N : Built)
19485 AddToWorklist(N);
19486 return S;
19489 return SDValue();
19492 /// Determines the LogBase2 value for a non-null input value using the
19493 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
19494 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
19495 EVT VT = V.getValueType();
19496 unsigned EltBits = VT.getScalarSizeInBits();
19497 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
19498 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
19499 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
19500 return LogBase2;
19503 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19504 /// For the reciprocal, we need to find the zero of the function:
19505 /// F(X) = A X - 1 [which has a zero at X = 1/A]
19506 /// =>
19507 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
19508 /// does not require additional intermediate precision]
19509 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
19510 if (Level >= AfterLegalizeDAG)
19511 return SDValue();
19513 // TODO: Handle half and/or extended types?
19514 EVT VT = Op.getValueType();
19515 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19516 return SDValue();
19518 // If estimates are explicitly disabled for this function, we're done.
19519 MachineFunction &MF = DAG.getMachineFunction();
19520 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
19521 if (Enabled == TLI.ReciprocalEstimate::Disabled)
19522 return SDValue();
19524 // Estimates may be explicitly enabled for this type with a custom number of
19525 // refinement steps.
19526 int Iterations = TLI.getDivRefinementSteps(VT, MF);
19527 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
19528 AddToWorklist(Est.getNode());
19530 if (Iterations) {
19531 SDLoc DL(Op);
19532 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
19534 // Newton iterations: Est = Est + Est (1 - Arg * Est)
19535 for (int i = 0; i < Iterations; ++i) {
19536 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
19537 AddToWorklist(NewEst.getNode());
19539 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
19540 AddToWorklist(NewEst.getNode());
19542 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19543 AddToWorklist(NewEst.getNode());
19545 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
19546 AddToWorklist(Est.getNode());
19549 return Est;
19552 return SDValue();
19555 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19556 /// For the reciprocal sqrt, we need to find the zero of the function:
19557 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19558 /// =>
19559 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
19560 /// As a result, we precompute A/2 prior to the iteration loop.
19561 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
19562 unsigned Iterations,
19563 SDNodeFlags Flags, bool Reciprocal) {
19564 EVT VT = Arg.getValueType();
19565 SDLoc DL(Arg);
19566 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
19568 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
19569 // this entire sequence requires only one FP constant.
19570 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
19571 AddToWorklist(HalfArg.getNode());
19573 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
19574 AddToWorklist(HalfArg.getNode());
19576 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
19577 for (unsigned i = 0; i < Iterations; ++i) {
19578 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
19579 AddToWorklist(NewEst.getNode());
19581 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
19582 AddToWorklist(NewEst.getNode());
19584 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
19585 AddToWorklist(NewEst.getNode());
19587 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19588 AddToWorklist(Est.getNode());
19591 // If non-reciprocal square root is requested, multiply the result by Arg.
19592 if (!Reciprocal) {
19593 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
19594 AddToWorklist(Est.getNode());
19597 return Est;
19600 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19601 /// For the reciprocal sqrt, we need to find the zero of the function:
19602 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19603 /// =>
19604 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
19605 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
19606 unsigned Iterations,
19607 SDNodeFlags Flags, bool Reciprocal) {
19608 EVT VT = Arg.getValueType();
19609 SDLoc DL(Arg);
19610 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
19611 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
19613 // This routine must enter the loop below to work correctly
19614 // when (Reciprocal == false).
19615 assert(Iterations > 0);
19617 // Newton iterations for reciprocal square root:
19618 // E = (E * -0.5) * ((A * E) * E + -3.0)
19619 for (unsigned i = 0; i < Iterations; ++i) {
19620 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
19621 AddToWorklist(AE.getNode());
19623 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
19624 AddToWorklist(AEE.getNode());
19626 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
19627 AddToWorklist(RHS.getNode());
19629 // When calculating a square root at the last iteration build:
19630 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
19631 // (notice a common subexpression)
19632 SDValue LHS;
19633 if (Reciprocal || (i + 1) < Iterations) {
19634 // RSQRT: LHS = (E * -0.5)
19635 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
19636 } else {
19637 // SQRT: LHS = (A * E) * -0.5
19638 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
19640 AddToWorklist(LHS.getNode());
19642 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
19643 AddToWorklist(Est.getNode());
19646 return Est;
19649 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
19650 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
19651 /// Op can be zero.
19652 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
19653 bool Reciprocal) {
19654 if (Level >= AfterLegalizeDAG)
19655 return SDValue();
19657 // TODO: Handle half and/or extended types?
19658 EVT VT = Op.getValueType();
19659 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19660 return SDValue();
19662 // If estimates are explicitly disabled for this function, we're done.
19663 MachineFunction &MF = DAG.getMachineFunction();
19664 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
19665 if (Enabled == TLI.ReciprocalEstimate::Disabled)
19666 return SDValue();
19668 // Estimates may be explicitly enabled for this type with a custom number of
19669 // refinement steps.
19670 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
19672 bool UseOneConstNR = false;
19673 if (SDValue Est =
19674 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
19675 Reciprocal)) {
19676 AddToWorklist(Est.getNode());
19678 if (Iterations) {
19679 Est = UseOneConstNR
19680 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
19681 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
19683 if (!Reciprocal) {
19684 // The estimate is now completely wrong if the input was exactly 0.0 or
19685 // possibly a denormal. Force the answer to 0.0 for those cases.
19686 SDLoc DL(Op);
19687 EVT CCVT = getSetCCResultType(VT);
19688 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
19689 const Function &F = DAG.getMachineFunction().getFunction();
19690 Attribute Denorms = F.getFnAttribute("denormal-fp-math");
19691 if (Denorms.getValueAsString().equals("ieee")) {
19692 // fabs(X) < SmallestNormal ? 0.0 : Est
19693 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
19694 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
19695 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
19696 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19697 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
19698 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19699 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19700 AddToWorklist(Fabs.getNode());
19701 AddToWorklist(IsDenorm.getNode());
19702 AddToWorklist(Est.getNode());
19703 } else {
19704 // X == 0.0 ? 0.0 : Est
19705 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19706 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19707 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19708 AddToWorklist(IsZero.getNode());
19709 AddToWorklist(Est.getNode());
19713 return Est;
19716 return SDValue();
19719 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19720 return buildSqrtEstimateImpl(Op, Flags, true);
19723 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19724 return buildSqrtEstimateImpl(Op, Flags, false);
19727 /// Return true if there is any possibility that the two addresses overlap.
19728 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
19730 struct MemUseCharacteristics {
19731 bool IsVolatile;
19732 SDValue BasePtr;
19733 int64_t Offset;
19734 Optional<int64_t> NumBytes;
19735 MachineMemOperand *MMO;
19738 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
19739 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
19740 int64_t Offset = 0;
19741 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
19742 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
19743 ? C->getSExtValue()
19744 : (LSN->getAddressingMode() == ISD::PRE_DEC)
19745 ? -1 * C->getSExtValue()
19746 : 0;
19747 return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
19748 Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
19749 LSN->getMemOperand()};
19751 if (const auto *LN = cast<LifetimeSDNode>(N))
19752 return {false /*isVolatile*/, LN->getOperand(1),
19753 (LN->hasOffset()) ? LN->getOffset() : 0,
19754 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
19755 : Optional<int64_t>(),
19756 (MachineMemOperand *)nullptr};
19757 // Default.
19758 return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
19759 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
19762 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
19763 MUC1 = getCharacteristics(Op1);
19765 // If they are to the same address, then they must be aliases.
19766 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
19767 MUC0.Offset == MUC1.Offset)
19768 return true;
19770 // If they are both volatile then they cannot be reordered.
19771 if (MUC0.IsVolatile && MUC1.IsVolatile)
19772 return true;
19774 if (MUC0.MMO && MUC1.MMO) {
19775 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
19776 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
19777 return false;
19780 bool IsAlias;
19781 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
19782 DAG, IsAlias) &&
19783 !IsAlias)
19784 return IsAlias;
19786 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
19787 // either are not known.
19788 if (!MUC0.MMO || !MUC1.MMO)
19789 return true;
19791 // If one operation reads from invariant memory, and the other may store, they
19792 // cannot alias. These should really be checking the equivalent of mayWrite,
19793 // but it only matters for memory nodes other than load /store.
19794 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
19795 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
19796 return false;
19798 // If we know required SrcValue1 and SrcValue2 have relatively large
19799 // alignment compared to the size and offset of the access, we may be able
19800 // to prove they do not alias. This check is conservative for now to catch
19801 // cases created by splitting vector types.
19802 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
19803 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
19804 unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
19805 unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
19806 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19807 MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
19808 *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
19809 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19810 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19812 // There is no overlap between these relatively aligned accesses of
19813 // similar size. Return no alias.
19814 if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
19815 (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
19816 return false;
19819 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
19820 ? CombinerGlobalAA
19821 : DAG.getSubtarget().useAA();
19822 #ifndef NDEBUG
19823 if (CombinerAAOnlyFunc.getNumOccurrences() &&
19824 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
19825 UseAA = false;
19826 #endif
19828 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
19829 // Use alias analysis information.
19830 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
19831 int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
19832 int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
19833 AliasResult AAResult = AA->alias(
19834 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
19835 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
19836 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
19837 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
19838 if (AAResult == NoAlias)
19839 return false;
19842 // Otherwise we have to assume they alias.
19843 return true;
19846 /// Walk up chain skipping non-aliasing memory nodes,
19847 /// looking for aliasing nodes and adding them to the Aliases vector.
19848 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
19849 SmallVectorImpl<SDValue> &Aliases) {
19850 SmallVector<SDValue, 8> Chains; // List of chains to visit.
19851 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
19853 // Get alias information for node.
19854 const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
19856 // Starting off.
19857 Chains.push_back(OriginalChain);
19858 unsigned Depth = 0;
19860 // Attempt to improve chain by a single step
19861 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
19862 switch (C.getOpcode()) {
19863 case ISD::EntryToken:
19864 // No need to mark EntryToken.
19865 C = SDValue();
19866 return true;
19867 case ISD::LOAD:
19868 case ISD::STORE: {
19869 // Get alias information for C.
19870 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
19871 !cast<LSBaseSDNode>(C.getNode())->isVolatile();
19872 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
19873 // Look further up the chain.
19874 C = C.getOperand(0);
19875 return true;
19877 // Alias, so stop here.
19878 return false;
19881 case ISD::CopyFromReg:
19882 // Always forward past past CopyFromReg.
19883 C = C.getOperand(0);
19884 return true;
19886 case ISD::LIFETIME_START:
19887 case ISD::LIFETIME_END: {
19888 // We can forward past any lifetime start/end that can be proven not to
19889 // alias the memory access.
19890 if (!isAlias(N, C.getNode())) {
19891 // Look further up the chain.
19892 C = C.getOperand(0);
19893 return true;
19895 return false;
19897 default:
19898 return false;
19902 // Look at each chain and determine if it is an alias. If so, add it to the
19903 // aliases list. If not, then continue up the chain looking for the next
19904 // candidate.
19905 while (!Chains.empty()) {
19906 SDValue Chain = Chains.pop_back_val();
19908 // Don't bother if we've seen Chain before.
19909 if (!Visited.insert(Chain.getNode()).second)
19910 continue;
19912 // For TokenFactor nodes, look at each operand and only continue up the
19913 // chain until we reach the depth limit.
19915 // FIXME: The depth check could be made to return the last non-aliasing
19916 // chain we found before we hit a tokenfactor rather than the original
19917 // chain.
19918 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
19919 Aliases.clear();
19920 Aliases.push_back(OriginalChain);
19921 return;
19924 if (Chain.getOpcode() == ISD::TokenFactor) {
19925 // We have to check each of the operands of the token factor for "small"
19926 // token factors, so we queue them up. Adding the operands to the queue
19927 // (stack) in reverse order maintains the original order and increases the
19928 // likelihood that getNode will find a matching token factor (CSE.)
19929 if (Chain.getNumOperands() > 16) {
19930 Aliases.push_back(Chain);
19931 continue;
19933 for (unsigned n = Chain.getNumOperands(); n;)
19934 Chains.push_back(Chain.getOperand(--n));
19935 ++Depth;
19936 continue;
19938 // Everything else
19939 if (ImproveChain(Chain)) {
19940 // Updated Chain Found, Consider new chain if one exists.
19941 if (Chain.getNode())
19942 Chains.push_back(Chain);
19943 ++Depth;
19944 continue;
19946 // No Improved Chain Possible, treat as Alias.
19947 Aliases.push_back(Chain);
19951 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19952 /// (aliasing node.)
19953 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
19954 if (OptLevel == CodeGenOpt::None)
19955 return OldChain;
19957 // Ops for replacing token factor.
19958 SmallVector<SDValue, 8> Aliases;
19960 // Accumulate all the aliases to this node.
19961 GatherAllAliases(N, OldChain, Aliases);
19963 // If no operands then chain to entry token.
19964 if (Aliases.size() == 0)
19965 return DAG.getEntryNode();
19967 // If a single operand then chain to it. We don't need to revisit it.
19968 if (Aliases.size() == 1)
19969 return Aliases[0];
19971 // Construct a custom tailored token factor.
19972 return DAG.getTokenFactor(SDLoc(N), Aliases);
19975 namespace {
19976 // TODO: Replace with with std::monostate when we move to C++17.
19977 struct UnitT { } Unit;
19978 bool operator==(const UnitT &, const UnitT &) { return true; }
19979 bool operator!=(const UnitT &, const UnitT &) { return false; }
19980 } // namespace
19982 // This function tries to collect a bunch of potentially interesting
19983 // nodes to improve the chains of, all at once. This might seem
19984 // redundant, as this function gets called when visiting every store
19985 // node, so why not let the work be done on each store as it's visited?
19987 // I believe this is mainly important because MergeConsecutiveStores
19988 // is unable to deal with merging stores of different sizes, so unless
19989 // we improve the chains of all the potential candidates up-front
19990 // before running MergeConsecutiveStores, it might only see some of
19991 // the nodes that will eventually be candidates, and then not be able
19992 // to go from a partially-merged state to the desired final
19993 // fully-merged state.
19995 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
19996 SmallVector<StoreSDNode *, 8> ChainedStores;
19997 StoreSDNode *STChain = St;
19998 // Intervals records which offsets from BaseIndex have been covered. In
19999 // the common case, every store writes to the immediately previous address
20000 // space and thus merged with the previous interval at insertion time.
20002 using IMap =
20003 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20004 IMap::Allocator A;
20005 IMap Intervals(A);
20007 // This holds the base pointer, index, and the offset in bytes from the base
20008 // pointer.
20009 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20011 // We must have a base and an offset.
20012 if (!BasePtr.getBase().getNode())
20013 return false;
20015 // Do not handle stores to undef base pointers.
20016 if (BasePtr.getBase().isUndef())
20017 return false;
20019 // Add ST's interval.
20020 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20022 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20023 // If the chain has more than one use, then we can't reorder the mem ops.
20024 if (!SDValue(Chain, 0)->hasOneUse())
20025 break;
20026 if (Chain->isVolatile() || Chain->isIndexed())
20027 break;
20029 // Find the base pointer and offset for this memory node.
20030 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20031 // Check that the base pointer is the same as the original one.
20032 int64_t Offset;
20033 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20034 break;
20035 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20036 // Make sure we don't overlap with other intervals by checking the ones to
20037 // the left or right before inserting.
20038 auto I = Intervals.find(Offset);
20039 // If there's a next interval, we should end before it.
20040 if (I != Intervals.end() && I.start() < (Offset + Length))
20041 break;
20042 // If there's a previous interval, we should start after it.
20043 if (I != Intervals.begin() && (--I).stop() <= Offset)
20044 break;
20045 Intervals.insert(Offset, Offset + Length, Unit);
20047 ChainedStores.push_back(Chain);
20048 STChain = Chain;
20051 // If we didn't find a chained store, exit.
20052 if (ChainedStores.size() == 0)
20053 return false;
20055 // Improve all chained stores (St and ChainedStores members) starting from
20056 // where the store chain ended and return single TokenFactor.
20057 SDValue NewChain = STChain->getChain();
20058 SmallVector<SDValue, 8> TFOps;
20059 for (unsigned I = ChainedStores.size(); I;) {
20060 StoreSDNode *S = ChainedStores[--I];
20061 SDValue BetterChain = FindBetterChain(S, NewChain);
20062 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20063 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20064 TFOps.push_back(SDValue(S, 0));
20065 ChainedStores[I] = S;
20068 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20069 SDValue BetterChain = FindBetterChain(St, NewChain);
20070 SDValue NewST;
20071 if (St->isTruncatingStore())
20072 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20073 St->getBasePtr(), St->getMemoryVT(),
20074 St->getMemOperand());
20075 else
20076 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20077 St->getBasePtr(), St->getMemOperand());
20079 TFOps.push_back(NewST);
20081 // If we improved every element of TFOps, then we've lost the dependence on
20082 // NewChain to successors of St and we need to add it back to TFOps. Do so at
20083 // the beginning to keep relative order consistent with FindBetterChains.
20084 auto hasImprovedChain = [&](SDValue ST) -> bool {
20085 return ST->getOperand(0) != NewChain;
20087 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20088 if (AddNewChain)
20089 TFOps.insert(TFOps.begin(), NewChain);
20091 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20092 CombineTo(St, TF);
20094 AddToWorklist(STChain);
20095 // Add TF operands worklist in reverse order.
20096 for (auto I = TF->getNumOperands(); I;)
20097 AddToWorklist(TF->getOperand(--I).getNode());
20098 AddToWorklist(TF.getNode());
20099 return true;
20102 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20103 if (OptLevel == CodeGenOpt::None)
20104 return false;
20106 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20108 // We must have a base and an offset.
20109 if (!BasePtr.getBase().getNode())
20110 return false;
20112 // Do not handle stores to undef base pointers.
20113 if (BasePtr.getBase().isUndef())
20114 return false;
20116 // Directly improve a chain of disjoint stores starting at St.
20117 if (parallelizeChainedStores(St))
20118 return true;
20120 // Improve St's Chain..
20121 SDValue BetterChain = FindBetterChain(St, St->getChain());
20122 if (St->getChain() != BetterChain) {
20123 replaceStoreChain(St, BetterChain);
20124 return true;
20126 return false;
20129 /// This is the entry point for the file.
20130 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20131 CodeGenOpt::Level OptLevel) {
20132 /// This is the main entry point to this class.
20133 DAGCombiner(*this, AA, OptLevel).Run(Level);