[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
blobd1f75b40e79db3d5956a1f592376cb2d0ef4e692
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MachineValueType.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Target/TargetMachine.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
78 using namespace llvm;
80 #define DEBUG_TYPE "dagcombine"
82 STATISTIC(NodesCombined , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
98 #ifndef NDEBUG
99 static cl::opt<std::string>
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103 #endif
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
112 static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
116 static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
121 static cl::opt<unsigned> TokenFactorInlineLimit(
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
125 static cl::opt<unsigned> StoreMergeDependenceLimit(
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG combiner enable reducing the width of load/op/store "
133 "sequence"));
135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG combiner enable load/<replace bytes>/store with "
138 "a narrower store"));
140 namespace {
142 class DAGCombiner {
143 SelectionDAG &DAG;
144 const TargetLowering &TLI;
145 const SelectionDAGTargetInfo *STI;
146 CombineLevel Level;
147 CodeGenOpt::Level OptLevel;
148 bool LegalDAG = false;
149 bool LegalOperations = false;
150 bool LegalTypes = false;
151 bool ForCodeSize;
152 bool DisableGenericCombines;
154 /// Worklist of all of the nodes that need to be simplified.
156 /// This must behave as a stack -- new nodes to process are pushed onto the
157 /// back and when processing we pop off of the back.
159 /// The worklist will not contain duplicates but may contain null entries
160 /// due to nodes being deleted from the underlying DAG.
161 SmallVector<SDNode *, 64> Worklist;
163 /// Mapping from an SDNode to its position on the worklist.
165 /// This is used to find and remove nodes from the worklist (by nulling
166 /// them) when they are deleted from the underlying DAG. It relies on
167 /// stable indices of nodes within the worklist.
168 DenseMap<SDNode *, unsigned> WorklistMap;
169 /// This records all nodes attempted to add to the worklist since we
170 /// considered a new worklist entry. As we keep do not add duplicate nodes
171 /// in the worklist, this is different from the tail of the worklist.
172 SmallSetVector<SDNode *, 32> PruningList;
174 /// Set of nodes which have been combined (at least once).
176 /// This is used to allow us to reliably add any operands of a DAG node
177 /// which have not yet been combined to the worklist.
178 SmallPtrSet<SDNode *, 32> CombinedNodes;
180 /// Map from candidate StoreNode to the pair of RootNode and count.
181 /// The count is used to track how many times we have seen the StoreNode
182 /// with the same RootNode bail out in dependence check. If we have seen
183 /// the bail out for the same pair many times over a limit, we won't
184 /// consider the StoreNode with the same RootNode as store merging
185 /// candidate again.
186 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
188 // AA - Used for DAG load/store alias analysis.
189 AliasAnalysis *AA;
191 /// When an instruction is simplified, add all users of the instruction to
192 /// the work lists because they might get more simplified now.
193 void AddUsersToWorklist(SDNode *N) {
194 for (SDNode *Node : N->uses())
195 AddToWorklist(Node);
198 /// Convenient shorthand to add a node and all of its user to the worklist.
199 void AddToWorklistWithUsers(SDNode *N) {
200 AddUsersToWorklist(N);
201 AddToWorklist(N);
204 // Prune potentially dangling nodes. This is called after
205 // any visit to a node, but should also be called during a visit after any
206 // failed combine which may have created a DAG node.
207 void clearAddedDanglingWorklistEntries() {
208 // Check any nodes added to the worklist to see if they are prunable.
209 while (!PruningList.empty()) {
210 auto *N = PruningList.pop_back_val();
211 if (N->use_empty())
212 recursivelyDeleteUnusedNodes(N);
216 SDNode *getNextWorklistEntry() {
217 // Before we do any work, remove nodes that are not in use.
218 clearAddedDanglingWorklistEntries();
219 SDNode *N = nullptr;
220 // The Worklist holds the SDNodes in order, but it may contain null
221 // entries.
222 while (!N && !Worklist.empty()) {
223 N = Worklist.pop_back_val();
226 if (N) {
227 bool GoodWorklistEntry = WorklistMap.erase(N);
228 (void)GoodWorklistEntry;
229 assert(GoodWorklistEntry &&
230 "Found a worklist entry without a corresponding map entry!");
232 return N;
235 /// Call the node-specific routine that folds each particular type of node.
236 SDValue visit(SDNode *N);
238 public:
239 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240 : DAG(D), TLI(D.getTargetLoweringInfo()),
241 STI(D.getSubtarget().getSelectionDAGInfo()),
242 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243 ForCodeSize = DAG.shouldOptForSize();
244 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
246 MaximumLegalStoreInBits = 0;
247 // We use the minimum store size here, since that's all we can guarantee
248 // for the scalable vector types.
249 for (MVT VT : MVT::all_valuetypes())
250 if (EVT(VT).isSimple() && VT != MVT::Other &&
251 TLI.isTypeLegal(EVT(VT)) &&
252 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
256 void ConsiderForPruning(SDNode *N) {
257 // Mark this for potential pruning.
258 PruningList.insert(N);
261 /// Add to the worklist making sure its instance is at the back (next to be
262 /// processed.)
263 void AddToWorklist(SDNode *N) {
264 assert(N->getOpcode() != ISD::DELETED_NODE &&
265 "Deleted Node added to Worklist");
267 // Skip handle nodes as they can't usefully be combined and confuse the
268 // zero-use deletion strategy.
269 if (N->getOpcode() == ISD::HANDLENODE)
270 return;
272 ConsiderForPruning(N);
274 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275 Worklist.push_back(N);
278 /// Remove all instances of N from the worklist.
279 void removeFromWorklist(SDNode *N) {
280 CombinedNodes.erase(N);
281 PruningList.remove(N);
282 StoreRootCountMap.erase(N);
284 auto It = WorklistMap.find(N);
285 if (It == WorklistMap.end())
286 return; // Not in the worklist.
288 // Null out the entry rather than erasing it to avoid a linear operation.
289 Worklist[It->second] = nullptr;
290 WorklistMap.erase(It);
293 void deleteAndRecombine(SDNode *N);
294 bool recursivelyDeleteUnusedNodes(SDNode *N);
296 /// Replaces all uses of the results of one DAG node with new values.
297 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 bool AddTo = true);
300 /// Replaces all uses of the results of one DAG node with new values.
301 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302 return CombineTo(N, &Res, 1, AddTo);
305 /// Replaces all uses of the results of one DAG node with new values.
306 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307 bool AddTo = true) {
308 SDValue To[] = { Res0, Res1 };
309 return CombineTo(N, To, 2, AddTo);
312 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
314 private:
315 unsigned MaximumLegalStoreInBits;
317 /// Check the specified integer node value to see if it can be simplified or
318 /// if things it uses can be simplified by bit propagation.
319 /// If so, return true.
320 bool SimplifyDemandedBits(SDValue Op) {
321 unsigned BitWidth = Op.getScalarValueSizeInBits();
322 APInt DemandedBits = APInt::getAllOnes(BitWidth);
323 return SimplifyDemandedBits(Op, DemandedBits);
326 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328 KnownBits Known;
329 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330 return false;
332 // Revisit the node.
333 AddToWorklist(Op.getNode());
335 CommitTargetLoweringOpt(TLO);
336 return true;
339 /// Check the specified vector node value to see if it can be simplified or
340 /// if things it uses can be simplified as it only uses some of the
341 /// elements. If so, return true.
342 bool SimplifyDemandedVectorElts(SDValue Op) {
343 // TODO: For now just pretend it cannot be simplified.
344 if (Op.getValueType().isScalableVector())
345 return false;
347 unsigned NumElts = Op.getValueType().getVectorNumElements();
348 APInt DemandedElts = APInt::getAllOnes(NumElts);
349 return SimplifyDemandedVectorElts(Op, DemandedElts);
352 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353 const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
355 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356 bool AssumeSingleUse = false);
358 bool CombineToPreIndexedLoadStore(SDNode *N);
359 bool CombineToPostIndexedLoadStore(SDNode *N);
360 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361 bool SliceUpLoad(SDNode *N);
363 // Scalars have size 0 to distinguish from singleton vectors.
364 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
368 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// load.
371 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373 /// \param EltNo index of the vector element to load.
374 /// \param OriginalLoad load that EVE came from to be replaced.
375 /// \returns EVE on success SDValue() on failure.
376 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377 SDValue EltNo,
378 LoadSDNode *OriginalLoad);
379 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383 SDValue PromoteIntBinOp(SDValue Op);
384 SDValue PromoteIntShiftOp(SDValue Op);
385 SDValue PromoteExtend(SDValue Op);
386 bool PromoteLoad(SDValue Op);
388 /// Call the node-specific routine that knows how to fold each
389 /// particular type of node. If that doesn't do anything, try the
390 /// target-specific DAG combines.
391 SDValue combine(SDNode *N);
393 // Visitation implementation - Implement dag node combining for different
394 // node types. The semantics are as follows:
395 // Return Value:
396 // SDValue.getNode() == 0 - No change was made
397 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398 // otherwise - N should be replaced by the returned Operand.
400 SDValue visitTokenFactor(SDNode *N);
401 SDValue visitMERGE_VALUES(SDNode *N);
402 SDValue visitADD(SDNode *N);
403 SDValue visitADDLike(SDNode *N);
404 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405 SDValue visitSUB(SDNode *N);
406 SDValue visitADDSAT(SDNode *N);
407 SDValue visitSUBSAT(SDNode *N);
408 SDValue visitADDC(SDNode *N);
409 SDValue visitADDO(SDNode *N);
410 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411 SDValue visitSUBC(SDNode *N);
412 SDValue visitSUBO(SDNode *N);
413 SDValue visitADDE(SDNode *N);
414 SDValue visitADDCARRY(SDNode *N);
415 SDValue visitSADDO_CARRY(SDNode *N);
416 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417 SDValue visitSUBE(SDNode *N);
418 SDValue visitSUBCARRY(SDNode *N);
419 SDValue visitSSUBO_CARRY(SDNode *N);
420 SDValue visitMUL(SDNode *N);
421 SDValue visitMULFIX(SDNode *N);
422 SDValue useDivRem(SDNode *N);
423 SDValue visitSDIV(SDNode *N);
424 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425 SDValue visitUDIV(SDNode *N);
426 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427 SDValue visitREM(SDNode *N);
428 SDValue visitMULHU(SDNode *N);
429 SDValue visitMULHS(SDNode *N);
430 SDValue visitSMUL_LOHI(SDNode *N);
431 SDValue visitUMUL_LOHI(SDNode *N);
432 SDValue visitMULO(SDNode *N);
433 SDValue visitIMINMAX(SDNode *N);
434 SDValue visitAND(SDNode *N);
435 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitOR(SDNode *N);
437 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitXOR(SDNode *N);
439 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
440 SDValue visitSHL(SDNode *N);
441 SDValue visitSRA(SDNode *N);
442 SDValue visitSRL(SDNode *N);
443 SDValue visitFunnelShift(SDNode *N);
444 SDValue visitRotate(SDNode *N);
445 SDValue visitABS(SDNode *N);
446 SDValue visitBSWAP(SDNode *N);
447 SDValue visitBITREVERSE(SDNode *N);
448 SDValue visitCTLZ(SDNode *N);
449 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450 SDValue visitCTTZ(SDNode *N);
451 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452 SDValue visitCTPOP(SDNode *N);
453 SDValue visitSELECT(SDNode *N);
454 SDValue visitVSELECT(SDNode *N);
455 SDValue visitSELECT_CC(SDNode *N);
456 SDValue visitSETCC(SDNode *N);
457 SDValue visitSETCCCARRY(SDNode *N);
458 SDValue visitSIGN_EXTEND(SDNode *N);
459 SDValue visitZERO_EXTEND(SDNode *N);
460 SDValue visitANY_EXTEND(SDNode *N);
461 SDValue visitAssertExt(SDNode *N);
462 SDValue visitAssertAlign(SDNode *N);
463 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465 SDValue visitTRUNCATE(SDNode *N);
466 SDValue visitBITCAST(SDNode *N);
467 SDValue visitFREEZE(SDNode *N);
468 SDValue visitBUILD_PAIR(SDNode *N);
469 SDValue visitFADD(SDNode *N);
470 SDValue visitSTRICT_FADD(SDNode *N);
471 SDValue visitFSUB(SDNode *N);
472 SDValue visitFMUL(SDNode *N);
473 SDValue visitFMA(SDNode *N);
474 SDValue visitFDIV(SDNode *N);
475 SDValue visitFREM(SDNode *N);
476 SDValue visitFSQRT(SDNode *N);
477 SDValue visitFCOPYSIGN(SDNode *N);
478 SDValue visitFPOW(SDNode *N);
479 SDValue visitSINT_TO_FP(SDNode *N);
480 SDValue visitUINT_TO_FP(SDNode *N);
481 SDValue visitFP_TO_SINT(SDNode *N);
482 SDValue visitFP_TO_UINT(SDNode *N);
483 SDValue visitFP_ROUND(SDNode *N);
484 SDValue visitFP_EXTEND(SDNode *N);
485 SDValue visitFNEG(SDNode *N);
486 SDValue visitFABS(SDNode *N);
487 SDValue visitFCEIL(SDNode *N);
488 SDValue visitFTRUNC(SDNode *N);
489 SDValue visitFFLOOR(SDNode *N);
490 SDValue visitFMinMax(SDNode *N);
491 SDValue visitBRCOND(SDNode *N);
492 SDValue visitBR_CC(SDNode *N);
493 SDValue visitLOAD(SDNode *N);
495 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
496 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
498 SDValue visitSTORE(SDNode *N);
499 SDValue visitLIFETIME_END(SDNode *N);
500 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
501 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
502 SDValue visitBUILD_VECTOR(SDNode *N);
503 SDValue visitCONCAT_VECTORS(SDNode *N);
504 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
505 SDValue visitVECTOR_SHUFFLE(SDNode *N);
506 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
507 SDValue visitINSERT_SUBVECTOR(SDNode *N);
508 SDValue visitMLOAD(SDNode *N);
509 SDValue visitMSTORE(SDNode *N);
510 SDValue visitMGATHER(SDNode *N);
511 SDValue visitMSCATTER(SDNode *N);
512 SDValue visitFP_TO_FP16(SDNode *N);
513 SDValue visitFP16_TO_FP(SDNode *N);
514 SDValue visitVECREDUCE(SDNode *N);
515 SDValue visitVPOp(SDNode *N);
517 SDValue visitFADDForFMACombine(SDNode *N);
518 SDValue visitFSUBForFMACombine(SDNode *N);
519 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
521 SDValue XformToShuffleWithZero(SDNode *N);
522 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523 const SDLoc &DL, SDValue N0,
524 SDValue N1);
525 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
526 SDValue N1);
527 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
528 SDValue N1, SDNodeFlags Flags);
530 SDValue visitShiftByConstant(SDNode *N);
532 SDValue foldSelectOfConstants(SDNode *N);
533 SDValue foldVSelectOfConstants(SDNode *N);
534 SDValue foldBinOpIntoSelect(SDNode *BO);
535 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
536 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
537 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
538 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
539 SDValue N2, SDValue N3, ISD::CondCode CC,
540 bool NotExtCompare = false);
541 SDValue convertSelectOfFPConstantsToLoadOffset(
542 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
543 ISD::CondCode CC);
544 SDValue foldSignChangeInBitcast(SDNode *N);
545 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
546 SDValue N2, SDValue N3, ISD::CondCode CC);
547 SDValue foldSelectOfBinops(SDNode *N);
548 SDValue foldSextSetcc(SDNode *N);
549 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
550 const SDLoc &DL);
551 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
552 SDValue unfoldMaskedMerge(SDNode *N);
553 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
554 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
555 const SDLoc &DL, bool foldBooleans);
556 SDValue rebuildSetCC(SDValue N);
558 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
559 SDValue &CC, bool MatchStrict = false) const;
560 bool isOneUseSetCC(SDValue N) const;
562 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
563 unsigned HiOp);
564 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
565 SDValue CombineExtLoad(SDNode *N);
566 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
567 SDValue combineRepeatedFPDivisors(SDNode *N);
568 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
569 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
570 SDValue BuildSDIV(SDNode *N);
571 SDValue BuildSDIVPow2(SDNode *N);
572 SDValue BuildUDIV(SDNode *N);
573 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
574 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
575 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
576 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
577 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
578 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
579 SDNodeFlags Flags, bool Reciprocal);
580 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
581 SDNodeFlags Flags, bool Reciprocal);
582 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
583 bool DemandHighBits = true);
584 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
585 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
586 SDValue InnerPos, SDValue InnerNeg,
587 unsigned PosOpcode, unsigned NegOpcode,
588 const SDLoc &DL);
589 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
590 SDValue InnerPos, SDValue InnerNeg,
591 unsigned PosOpcode, unsigned NegOpcode,
592 const SDLoc &DL);
593 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
594 SDValue MatchLoadCombine(SDNode *N);
595 SDValue mergeTruncStores(StoreSDNode *N);
596 SDValue reduceLoadWidth(SDNode *N);
597 SDValue ReduceLoadOpStoreWidth(SDNode *N);
598 SDValue splitMergedValStore(StoreSDNode *ST);
599 SDValue TransformFPLoadStorePair(SDNode *N);
600 SDValue convertBuildVecZextToZext(SDNode *N);
601 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
602 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
603 SDValue reduceBuildVecToShuffle(SDNode *N);
604 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
605 ArrayRef<int> VectorMask, SDValue VecIn1,
606 SDValue VecIn2, unsigned LeftIdx,
607 bool DidSplitVec);
608 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610 /// Walk up chain skipping non-aliasing memory nodes,
611 /// looking for aliasing nodes and adding them to the Aliases vector.
612 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
613 SmallVectorImpl<SDValue> &Aliases);
615 /// Return true if there is any possibility that the two addresses overlap.
616 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
618 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
619 /// chain (aliasing node.)
620 SDValue FindBetterChain(SDNode *N, SDValue Chain);
622 /// Try to replace a store and any possibly adjacent stores on
623 /// consecutive chains with better chains. Return true only if St is
624 /// replaced.
626 /// Notice that other chains may still be replaced even if the function
627 /// returns false.
628 bool findBetterNeighborChains(StoreSDNode *St);
630 // Helper for findBetterNeighborChains. Walk up store chain add additional
631 // chained stores that do not overlap and can be parallelized.
632 bool parallelizeChainedStores(StoreSDNode *St);
634 /// Holds a pointer to an LSBaseSDNode as well as information on where it
635 /// is located in a sequence of memory operations connected by a chain.
636 struct MemOpLink {
637 // Ptr to the mem node.
638 LSBaseSDNode *MemNode;
640 // Offset from the base ptr.
641 int64_t OffsetFromBase;
643 MemOpLink(LSBaseSDNode *N, int64_t Offset)
644 : MemNode(N), OffsetFromBase(Offset) {}
647 // Classify the origin of a stored value.
648 enum class StoreSource { Unknown, Constant, Extract, Load };
649 StoreSource getStoreSource(SDValue StoreVal) {
650 switch (StoreVal.getOpcode()) {
651 case ISD::Constant:
652 case ISD::ConstantFP:
653 return StoreSource::Constant;
654 case ISD::EXTRACT_VECTOR_ELT:
655 case ISD::EXTRACT_SUBVECTOR:
656 return StoreSource::Extract;
657 case ISD::LOAD:
658 return StoreSource::Load;
659 default:
660 return StoreSource::Unknown;
664 /// This is a helper function for visitMUL to check the profitability
665 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
666 /// MulNode is the original multiply, AddNode is (add x, c1),
667 /// and ConstNode is c2.
668 bool isMulAddWithConstProfitable(SDNode *MulNode,
669 SDValue &AddNode,
670 SDValue &ConstNode);
672 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
673 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
674 /// the type of the loaded value to be extended.
675 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
676 EVT LoadResultTy, EVT &ExtVT);
678 /// Helper function to calculate whether the given Load/Store can have its
679 /// width reduced to ExtVT.
680 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
681 EVT &MemVT, unsigned ShAmt = 0);
683 /// Used by BackwardsPropagateMask to find suitable loads.
684 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
685 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
686 ConstantSDNode *Mask, SDNode *&NodeToMask);
687 /// Attempt to propagate a given AND node back to load leaves so that they
688 /// can be combined into narrow loads.
689 bool BackwardsPropagateMask(SDNode *N);
691 /// Helper function for mergeConsecutiveStores which merges the component
692 /// store chains.
693 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
694 unsigned NumStores);
696 /// This is a helper function for mergeConsecutiveStores. When the source
697 /// elements of the consecutive stores are all constants or all extracted
698 /// vector elements, try to merge them into one larger store introducing
699 /// bitcasts if necessary. \return True if a merged store was created.
700 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
701 EVT MemVT, unsigned NumStores,
702 bool IsConstantSrc, bool UseVector,
703 bool UseTrunc);
705 /// This is a helper function for mergeConsecutiveStores. Stores that
706 /// potentially may be merged with St are placed in StoreNodes. RootNode is
707 /// a chain predecessor to all store candidates.
708 void getStoreMergeCandidates(StoreSDNode *St,
709 SmallVectorImpl<MemOpLink> &StoreNodes,
710 SDNode *&Root);
712 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713 /// have indirect dependency through their operands. RootNode is the
714 /// predecessor to all stores calculated by getStoreMergeCandidates and is
715 /// used to prune the dependency check. \return True if safe to merge.
716 bool checkMergeStoreCandidatesForDependencies(
717 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718 SDNode *RootNode);
720 /// This is a helper function for mergeConsecutiveStores. Given a list of
721 /// store candidates, find the first N that are consecutive in memory.
722 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
723 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
724 int64_t ElementSizeBytes) const;
726 /// This is a helper function for mergeConsecutiveStores. It is used for
727 /// store chains that are composed entirely of constant values.
728 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
729 unsigned NumConsecutiveStores,
730 EVT MemVT, SDNode *Root, bool AllowVectors);
732 /// This is a helper function for mergeConsecutiveStores. It is used for
733 /// store chains that are composed entirely of extracted vector elements.
734 /// When extracting multiple vector elements, try to store them in one
735 /// vector store rather than a sequence of scalar stores.
736 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
737 unsigned NumConsecutiveStores, EVT MemVT,
738 SDNode *Root);
740 /// This is a helper function for mergeConsecutiveStores. It is used for
741 /// store chains that are composed entirely of loaded values.
742 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
743 unsigned NumConsecutiveStores, EVT MemVT,
744 SDNode *Root, bool AllowVectors,
745 bool IsNonTemporalStore, bool IsNonTemporalLoad);
747 /// Merge consecutive store operations into a wide store.
748 /// This optimization uses wide integers or vectors when possible.
749 /// \return true if stores were merged.
750 bool mergeConsecutiveStores(StoreSDNode *St);
752 /// Try to transform a truncation where C is a constant:
753 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
755 /// \p N needs to be a truncation and its first operand an AND. Other
756 /// requirements are checked by the function (e.g. that trunc is
757 /// single-use) and if missed an empty SDValue is returned.
758 SDValue distributeTruncateThroughAnd(SDNode *N);
760 /// Helper function to determine whether the target supports operation
761 /// given by \p Opcode for type \p VT, that is, whether the operation
762 /// is legal or custom before legalizing operations, and whether is
763 /// legal (but not custom) after legalization.
764 bool hasOperation(unsigned Opcode, EVT VT) {
765 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
768 public:
769 /// Runs the dag combiner on all nodes in the work list
770 void Run(CombineLevel AtLevel);
772 SelectionDAG &getDAG() const { return DAG; }
774 /// Returns a type large enough to hold any valid shift amount - before type
775 /// legalization these can be huge.
776 EVT getShiftAmountTy(EVT LHSTy) {
777 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
778 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
781 /// This method returns true if we are running before type legalization or
782 /// if the specified VT is legal.
783 bool isTypeLegal(const EVT &VT) {
784 if (!LegalTypes) return true;
785 return TLI.isTypeLegal(VT);
788 /// Convenience wrapper around TargetLowering::getSetCCResultType
789 EVT getSetCCResultType(EVT VT) const {
790 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
793 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
794 SDValue OrigLoad, SDValue ExtLoad,
795 ISD::NodeType ExtType);
798 /// This class is a DAGUpdateListener that removes any deleted
799 /// nodes from the worklist.
800 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801 DAGCombiner &DC;
803 public:
804 explicit WorklistRemover(DAGCombiner &dc)
805 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
807 void NodeDeleted(SDNode *N, SDNode *E) override {
808 DC.removeFromWorklist(N);
812 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813 DAGCombiner &DC;
815 public:
816 explicit WorklistInserter(DAGCombiner &dc)
817 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
819 // FIXME: Ideally we could add N to the worklist, but this causes exponential
820 // compile time costs in large DAGs, e.g. Halide.
821 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
824 } // end anonymous namespace
826 //===----------------------------------------------------------------------===//
827 // TargetLowering::DAGCombinerInfo implementation
828 //===----------------------------------------------------------------------===//
830 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
831 ((DAGCombiner*)DC)->AddToWorklist(N);
834 SDValue TargetLowering::DAGCombinerInfo::
835 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
836 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
839 SDValue TargetLowering::DAGCombinerInfo::
840 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
844 SDValue TargetLowering::DAGCombinerInfo::
845 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
846 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
849 bool TargetLowering::DAGCombinerInfo::
850 recursivelyDeleteUnusedNodes(SDNode *N) {
851 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
854 void TargetLowering::DAGCombinerInfo::
855 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
856 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
859 //===----------------------------------------------------------------------===//
860 // Helper Functions
861 //===----------------------------------------------------------------------===//
863 void DAGCombiner::deleteAndRecombine(SDNode *N) {
864 removeFromWorklist(N);
866 // If the operands of this node are only used by the node, they will now be
867 // dead. Make sure to re-visit them and recursively delete dead nodes.
868 for (const SDValue &Op : N->ops())
869 // For an operand generating multiple values, one of the values may
870 // become dead allowing further simplification (e.g. split index
871 // arithmetic from an indexed load).
872 if (Op->hasOneUse() || Op->getNumValues() > 1)
873 AddToWorklist(Op.getNode());
875 DAG.DeleteNode(N);
878 // APInts must be the same size for most operations, this helper
879 // function zero extends the shorter of the pair so that they match.
880 // We provide an Offset so that we can create bitwidths that won't overflow.
881 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883 LHS = LHS.zextOrSelf(Bits);
884 RHS = RHS.zextOrSelf(Bits);
887 // Return true if this node is a setcc, or is a select_cc
888 // that selects between the target values used for true and false, making it
889 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890 // the appropriate nodes based on the type of node we are checking. This
891 // simplifies life a bit for the callers.
892 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893 SDValue &CC, bool MatchStrict) const {
894 if (N.getOpcode() == ISD::SETCC) {
895 LHS = N.getOperand(0);
896 RHS = N.getOperand(1);
897 CC = N.getOperand(2);
898 return true;
901 if (MatchStrict &&
902 (N.getOpcode() == ISD::STRICT_FSETCC ||
903 N.getOpcode() == ISD::STRICT_FSETCCS)) {
904 LHS = N.getOperand(1);
905 RHS = N.getOperand(2);
906 CC = N.getOperand(3);
907 return true;
910 if (N.getOpcode() != ISD::SELECT_CC ||
911 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
912 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
913 return false;
915 if (TLI.getBooleanContents(N.getValueType()) ==
916 TargetLowering::UndefinedBooleanContent)
917 return false;
919 LHS = N.getOperand(0);
920 RHS = N.getOperand(1);
921 CC = N.getOperand(4);
922 return true;
925 /// Return true if this is a SetCC-equivalent operation with only one use.
926 /// If this is true, it allows the users to invert the operation for free when
927 /// it is profitable to do so.
928 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
929 SDValue N0, N1, N2;
930 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
931 return true;
932 return false;
935 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
936 if (!ScalarTy.isSimple())
937 return false;
939 uint64_t MaskForTy = 0ULL;
940 switch (ScalarTy.getSimpleVT().SimpleTy) {
941 case MVT::i8:
942 MaskForTy = 0xFFULL;
943 break;
944 case MVT::i16:
945 MaskForTy = 0xFFFFULL;
946 break;
947 case MVT::i32:
948 MaskForTy = 0xFFFFFFFFULL;
949 break;
950 default:
951 return false;
952 break;
955 APInt Val;
956 if (ISD::isConstantSplatVector(N, Val))
957 return Val.getLimitedValue() == MaskForTy;
959 return false;
962 // Determines if it is a constant integer or a splat/build vector of constant
963 // integers (and undefs).
964 // Do not permit build vector implicit truncation.
965 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
966 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
967 return !(Const->isOpaque() && NoOpaques);
968 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
969 return false;
970 unsigned BitWidth = N.getScalarValueSizeInBits();
971 for (const SDValue &Op : N->op_values()) {
972 if (Op.isUndef())
973 continue;
974 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
975 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
976 (Const->isOpaque() && NoOpaques))
977 return false;
979 return true;
982 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
983 // undef's.
984 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
985 if (V.getOpcode() != ISD::BUILD_VECTOR)
986 return false;
987 return isConstantOrConstantVector(V, NoOpaques) ||
988 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
991 // Determine if this an indexed load with an opaque target constant index.
992 static bool canSplitIdx(LoadSDNode *LD) {
993 return MaySplitLoadIndex &&
994 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
995 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
999 const SDLoc &DL,
1000 SDValue N0,
1001 SDValue N1) {
1002 // Currently this only tries to ensure we don't undo the GEP splits done by
1003 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1004 // we check if the following transformation would be problematic:
1005 // (load/store (add, (add, x, offset1), offset2)) ->
1006 // (load/store (add, x, offset1+offset2)).
1008 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1009 return false;
1011 if (N0.hasOneUse())
1012 return false;
1014 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1015 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1016 if (!C1 || !C2)
1017 return false;
1019 const APInt &C1APIntVal = C1->getAPIntValue();
1020 const APInt &C2APIntVal = C2->getAPIntValue();
1021 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1022 return false;
1024 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1025 if (CombinedValueIntVal.getBitWidth() > 64)
1026 return false;
1027 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1029 for (SDNode *Node : N0->uses()) {
1030 auto LoadStore = dyn_cast<MemSDNode>(Node);
1031 if (LoadStore) {
1032 // Is x[offset2] already not a legal addressing mode? If so then
1033 // reassociating the constants breaks nothing (we test offset2 because
1034 // that's the one we hope to fold into the load or store).
1035 TargetLoweringBase::AddrMode AM;
1036 AM.HasBaseReg = true;
1037 AM.BaseOffs = C2APIntVal.getSExtValue();
1038 EVT VT = LoadStore->getMemoryVT();
1039 unsigned AS = LoadStore->getAddressSpace();
1040 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1041 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1042 continue;
1044 // Would x[offset1+offset2] still be a legal addressing mode?
1045 AM.BaseOffs = CombinedValue;
1046 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1047 return true;
1051 return false;
1054 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1055 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1056 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1057 SDValue N0, SDValue N1) {
1058 EVT VT = N0.getValueType();
1060 if (N0.getOpcode() != Opc)
1061 return SDValue();
1063 SDValue N00 = N0.getOperand(0);
1064 SDValue N01 = N0.getOperand(1);
1066 if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1067 if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1068 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1069 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1070 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1071 return SDValue();
1073 if (N0.hasOneUse()) {
1074 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075 // iff (op x, c1) has one use
1076 if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1077 return DAG.getNode(Opc, DL, VT, OpNode, N01);
1078 return SDValue();
1081 return SDValue();
1084 // Try to reassociate commutative binops.
1085 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086 SDValue N1, SDNodeFlags Flags) {
1087 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1089 // Floating-point reassociation is not allowed without loose FP math.
1090 if (N0.getValueType().isFloatingPoint() ||
1091 N1.getValueType().isFloatingPoint())
1092 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093 return SDValue();
1095 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096 return Combined;
1097 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098 return Combined;
1099 return SDValue();
1102 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103 bool AddTo) {
1104 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1105 ++NodesCombined;
1106 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1107 To[0].getNode()->dump(&DAG);
1108 dbgs() << " and " << NumTo - 1 << " other values\n");
1109 for (unsigned i = 0, e = NumTo; i != e; ++i)
1110 assert((!To[i].getNode() ||
1111 N->getValueType(i) == To[i].getValueType()) &&
1112 "Cannot combine value to value of different type!");
1114 WorklistRemover DeadNodes(*this);
1115 DAG.ReplaceAllUsesWith(N, To);
1116 if (AddTo) {
1117 // Push the new nodes and any users onto the worklist
1118 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119 if (To[i].getNode()) {
1120 AddToWorklist(To[i].getNode());
1121 AddUsersToWorklist(To[i].getNode());
1126 // Finally, if the node is now dead, remove it from the graph. The node
1127 // may not be dead if the replacement process recursively simplified to
1128 // something else needing this node.
1129 if (N->use_empty())
1130 deleteAndRecombine(N);
1131 return SDValue(N, 0);
1134 void DAGCombiner::
1135 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136 // Replace the old value with the new one.
1137 ++NodesCombined;
1138 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1139 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1140 dbgs() << '\n');
1142 // Replace all uses. If any nodes become isomorphic to other nodes and
1143 // are deleted, make sure to remove them from our worklist.
1144 WorklistRemover DeadNodes(*this);
1145 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147 // Push the new node and any (possibly new) users onto the worklist.
1148 AddToWorklistWithUsers(TLO.New.getNode());
1150 // Finally, if the node is now dead, remove it from the graph. The node
1151 // may not be dead if the replacement process recursively simplified to
1152 // something else needing this node.
1153 if (TLO.Old.getNode()->use_empty())
1154 deleteAndRecombine(TLO.Old.getNode());
1157 /// Check the specified integer node value to see if it can be simplified or if
1158 /// things it uses can be simplified by bit propagation. If so, return true.
1159 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160 const APInt &DemandedElts,
1161 bool AssumeSingleUse) {
1162 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163 KnownBits Known;
1164 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165 AssumeSingleUse))
1166 return false;
1168 // Revisit the node.
1169 AddToWorklist(Op.getNode());
1171 CommitTargetLoweringOpt(TLO);
1172 return true;
1175 /// Check the specified vector node value to see if it can be simplified or
1176 /// if things it uses can be simplified as it only uses some of the elements.
1177 /// If so, return true.
1178 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179 const APInt &DemandedElts,
1180 bool AssumeSingleUse) {
1181 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182 APInt KnownUndef, KnownZero;
1183 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184 TLO, 0, AssumeSingleUse))
1185 return false;
1187 // Revisit the node.
1188 AddToWorklist(Op.getNode());
1190 CommitTargetLoweringOpt(TLO);
1191 return true;
1194 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195 SDLoc DL(Load);
1196 EVT VT = Load->getValueType(0);
1197 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1200 Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1201 WorklistRemover DeadNodes(*this);
1202 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204 deleteAndRecombine(Load);
1205 AddToWorklist(Trunc.getNode());
1208 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209 Replace = false;
1210 SDLoc DL(Op);
1211 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212 LoadSDNode *LD = cast<LoadSDNode>(Op);
1213 EVT MemVT = LD->getMemoryVT();
1214 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1215 : LD->getExtensionType();
1216 Replace = true;
1217 return DAG.getExtLoad(ExtType, DL, PVT,
1218 LD->getChain(), LD->getBasePtr(),
1219 MemVT, LD->getMemOperand());
1222 unsigned Opc = Op.getOpcode();
1223 switch (Opc) {
1224 default: break;
1225 case ISD::AssertSext:
1226 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228 break;
1229 case ISD::AssertZext:
1230 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232 break;
1233 case ISD::Constant: {
1234 unsigned ExtOpc =
1235 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236 return DAG.getNode(ExtOpc, DL, PVT, Op);
1240 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241 return SDValue();
1242 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1245 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1247 return SDValue();
1248 EVT OldVT = Op.getValueType();
1249 SDLoc DL(Op);
1250 bool Replace = false;
1251 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252 if (!NewOp.getNode())
1253 return SDValue();
1254 AddToWorklist(NewOp.getNode());
1256 if (Replace)
1257 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259 DAG.getValueType(OldVT));
1262 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263 EVT OldVT = Op.getValueType();
1264 SDLoc DL(Op);
1265 bool Replace = false;
1266 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267 if (!NewOp.getNode())
1268 return SDValue();
1269 AddToWorklist(NewOp.getNode());
1271 if (Replace)
1272 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1276 /// Promote the specified integer binary operation if the target indicates it is
1277 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278 /// i32 since i16 instructions are longer.
1279 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280 if (!LegalOperations)
1281 return SDValue();
1283 EVT VT = Op.getValueType();
1284 if (VT.isVector() || !VT.isInteger())
1285 return SDValue();
1287 // If operation type is 'undesirable', e.g. i16 on x86, consider
1288 // promoting it.
1289 unsigned Opc = Op.getOpcode();
1290 if (TLI.isTypeDesirableForOp(Opc, VT))
1291 return SDValue();
1293 EVT PVT = VT;
1294 // Consult target whether it is a good idea to promote this operation and
1295 // what's the right type to promote it to.
1296 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297 assert(PVT != VT && "Don't know what type to promote to!");
1299 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1301 bool Replace0 = false;
1302 SDValue N0 = Op.getOperand(0);
1303 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305 bool Replace1 = false;
1306 SDValue N1 = Op.getOperand(1);
1307 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308 SDLoc DL(Op);
1310 SDValue RV =
1311 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313 // We are always replacing N0/N1's use in N and only need additional
1314 // replacements if there are additional uses.
1315 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316 // (SDValue) here because the node may reference multiple values
1317 // (for example, the chain value of a load node).
1318 Replace0 &= !N0->hasOneUse();
1319 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321 // Combine Op here so it is preserved past replacements.
1322 CombineTo(Op.getNode(), RV);
1324 // If operands have a use ordering, make sure we deal with
1325 // predecessor first.
1326 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327 std::swap(N0, N1);
1328 std::swap(NN0, NN1);
1331 if (Replace0) {
1332 AddToWorklist(NN0.getNode());
1333 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335 if (Replace1) {
1336 AddToWorklist(NN1.getNode());
1337 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339 return Op;
1341 return SDValue();
1344 /// Promote the specified integer shift operation if the target indicates it is
1345 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346 /// i32 since i16 instructions are longer.
1347 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348 if (!LegalOperations)
1349 return SDValue();
1351 EVT VT = Op.getValueType();
1352 if (VT.isVector() || !VT.isInteger())
1353 return SDValue();
1355 // If operation type is 'undesirable', e.g. i16 on x86, consider
1356 // promoting it.
1357 unsigned Opc = Op.getOpcode();
1358 if (TLI.isTypeDesirableForOp(Opc, VT))
1359 return SDValue();
1361 EVT PVT = VT;
1362 // Consult target whether it is a good idea to promote this operation and
1363 // what's the right type to promote it to.
1364 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365 assert(PVT != VT && "Don't know what type to promote to!");
1367 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1369 bool Replace = false;
1370 SDValue N0 = Op.getOperand(0);
1371 SDValue N1 = Op.getOperand(1);
1372 if (Opc == ISD::SRA)
1373 N0 = SExtPromoteOperand(N0, PVT);
1374 else if (Opc == ISD::SRL)
1375 N0 = ZExtPromoteOperand(N0, PVT);
1376 else
1377 N0 = PromoteOperand(N0, PVT, Replace);
1379 if (!N0.getNode())
1380 return SDValue();
1382 SDLoc DL(Op);
1383 SDValue RV =
1384 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386 if (Replace)
1387 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389 // Deal with Op being deleted.
1390 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391 return RV;
1393 return SDValue();
1396 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397 if (!LegalOperations)
1398 return SDValue();
1400 EVT VT = Op.getValueType();
1401 if (VT.isVector() || !VT.isInteger())
1402 return SDValue();
1404 // If operation type is 'undesirable', e.g. i16 on x86, consider
1405 // promoting it.
1406 unsigned Opc = Op.getOpcode();
1407 if (TLI.isTypeDesirableForOp(Opc, VT))
1408 return SDValue();
1410 EVT PVT = VT;
1411 // Consult target whether it is a good idea to promote this operation and
1412 // what's the right type to promote it to.
1413 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414 assert(PVT != VT && "Don't know what type to promote to!");
1415 // fold (aext (aext x)) -> (aext x)
1416 // fold (aext (zext x)) -> (zext x)
1417 // fold (aext (sext x)) -> (sext x)
1418 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1419 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421 return SDValue();
1424 bool DAGCombiner::PromoteLoad(SDValue Op) {
1425 if (!LegalOperations)
1426 return false;
1428 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429 return false;
1431 EVT VT = Op.getValueType();
1432 if (VT.isVector() || !VT.isInteger())
1433 return false;
1435 // If operation type is 'undesirable', e.g. i16 on x86, consider
1436 // promoting it.
1437 unsigned Opc = Op.getOpcode();
1438 if (TLI.isTypeDesirableForOp(Opc, VT))
1439 return false;
1441 EVT PVT = VT;
1442 // Consult target whether it is a good idea to promote this operation and
1443 // what's the right type to promote it to.
1444 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445 assert(PVT != VT && "Don't know what type to promote to!");
1447 SDLoc DL(Op);
1448 SDNode *N = Op.getNode();
1449 LoadSDNode *LD = cast<LoadSDNode>(N);
1450 EVT MemVT = LD->getMemoryVT();
1451 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1452 : LD->getExtensionType();
1453 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454 LD->getChain(), LD->getBasePtr(),
1455 MemVT, LD->getMemOperand());
1456 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1459 Result.getNode()->dump(&DAG); dbgs() << '\n');
1460 WorklistRemover DeadNodes(*this);
1461 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463 deleteAndRecombine(N);
1464 AddToWorklist(Result.getNode());
1465 return true;
1467 return false;
1470 /// Recursively delete a node which has no uses and any operands for
1471 /// which it is the only use.
1473 /// Note that this both deletes the nodes and removes them from the worklist.
1474 /// It also adds any nodes who have had a user deleted to the worklist as they
1475 /// may now have only one use and subject to other combines.
1476 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477 if (!N->use_empty())
1478 return false;
1480 SmallSetVector<SDNode *, 16> Nodes;
1481 Nodes.insert(N);
1482 do {
1483 N = Nodes.pop_back_val();
1484 if (!N)
1485 continue;
1487 if (N->use_empty()) {
1488 for (const SDValue &ChildN : N->op_values())
1489 Nodes.insert(ChildN.getNode());
1491 removeFromWorklist(N);
1492 DAG.DeleteNode(N);
1493 } else {
1494 AddToWorklist(N);
1496 } while (!Nodes.empty());
1497 return true;
1500 //===----------------------------------------------------------------------===//
1501 // Main DAG Combiner implementation
1502 //===----------------------------------------------------------------------===//
1504 void DAGCombiner::Run(CombineLevel AtLevel) {
1505 // set the instance variables, so that the various visit routines may use it.
1506 Level = AtLevel;
1507 LegalDAG = Level >= AfterLegalizeDAG;
1508 LegalOperations = Level >= AfterLegalizeVectorOps;
1509 LegalTypes = Level >= AfterLegalizeTypes;
1511 WorklistInserter AddNodes(*this);
1513 // Add all the dag nodes to the worklist.
1514 for (SDNode &Node : DAG.allnodes())
1515 AddToWorklist(&Node);
1517 // Create a dummy node (which is not added to allnodes), that adds a reference
1518 // to the root node, preventing it from being deleted, and tracking any
1519 // changes of the root.
1520 HandleSDNode Dummy(DAG.getRoot());
1522 // While we have a valid worklist entry node, try to combine it.
1523 while (SDNode *N = getNextWorklistEntry()) {
1524 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1525 // N is deleted from the DAG, since they too may now be dead or may have a
1526 // reduced number of uses, allowing other xforms.
1527 if (recursivelyDeleteUnusedNodes(N))
1528 continue;
1530 WorklistRemover DeadNodes(*this);
1532 // If this combine is running after legalizing the DAG, re-legalize any
1533 // nodes pulled off the worklist.
1534 if (LegalDAG) {
1535 SmallSetVector<SDNode *, 16> UpdatedNodes;
1536 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538 for (SDNode *LN : UpdatedNodes)
1539 AddToWorklistWithUsers(LN);
1541 if (!NIsValid)
1542 continue;
1545 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1547 // Add any operands of the new node which have not yet been combined to the
1548 // worklist as well. Because the worklist uniques things already, this
1549 // won't repeatedly process the same operand.
1550 CombinedNodes.insert(N);
1551 for (const SDValue &ChildN : N->op_values())
1552 if (!CombinedNodes.count(ChildN.getNode()))
1553 AddToWorklist(ChildN.getNode());
1555 SDValue RV = combine(N);
1557 if (!RV.getNode())
1558 continue;
1560 ++NodesCombined;
1562 // If we get back the same node we passed in, rather than a new node or
1563 // zero, we know that the node must have defined multiple values and
1564 // CombineTo was used. Since CombineTo takes care of the worklist
1565 // mechanics for us, we have no work to do in this case.
1566 if (RV.getNode() == N)
1567 continue;
1569 assert(N->getOpcode() != ISD::DELETED_NODE &&
1570 RV.getOpcode() != ISD::DELETED_NODE &&
1571 "Node was deleted but visit returned new node!");
1573 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1575 if (N->getNumValues() == RV.getNode()->getNumValues())
1576 DAG.ReplaceAllUsesWith(N, RV.getNode());
1577 else {
1578 assert(N->getValueType(0) == RV.getValueType() &&
1579 N->getNumValues() == 1 && "Type mismatch");
1580 DAG.ReplaceAllUsesWith(N, &RV);
1583 // Push the new node and any users onto the worklist. Omit this if the
1584 // new node is the EntryToken (e.g. if a store managed to get optimized
1585 // out), because re-visiting the EntryToken and its users will not uncover
1586 // any additional opportunities, but there may be a large number of such
1587 // users, potentially causing compile time explosion.
1588 if (RV.getOpcode() != ISD::EntryToken) {
1589 AddToWorklist(RV.getNode());
1590 AddUsersToWorklist(RV.getNode());
1593 // Finally, if the node is now dead, remove it from the graph. The node
1594 // may not be dead if the replacement process recursively simplified to
1595 // something else needing this node. This will also take care of adding any
1596 // operands which have lost a user to the worklist.
1597 recursivelyDeleteUnusedNodes(N);
1600 // If the root changed (e.g. it was a dead load, update the root).
1601 DAG.setRoot(Dummy.getValue());
1602 DAG.RemoveDeadNodes();
1605 SDValue DAGCombiner::visit(SDNode *N) {
1606 switch (N->getOpcode()) {
1607 default: break;
1608 case ISD::TokenFactor: return visitTokenFactor(N);
1609 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1610 case ISD::ADD: return visitADD(N);
1611 case ISD::SUB: return visitSUB(N);
1612 case ISD::SADDSAT:
1613 case ISD::UADDSAT: return visitADDSAT(N);
1614 case ISD::SSUBSAT:
1615 case ISD::USUBSAT: return visitSUBSAT(N);
1616 case ISD::ADDC: return visitADDC(N);
1617 case ISD::SADDO:
1618 case ISD::UADDO: return visitADDO(N);
1619 case ISD::SUBC: return visitSUBC(N);
1620 case ISD::SSUBO:
1621 case ISD::USUBO: return visitSUBO(N);
1622 case ISD::ADDE: return visitADDE(N);
1623 case ISD::ADDCARRY: return visitADDCARRY(N);
1624 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1625 case ISD::SUBE: return visitSUBE(N);
1626 case ISD::SUBCARRY: return visitSUBCARRY(N);
1627 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1628 case ISD::SMULFIX:
1629 case ISD::SMULFIXSAT:
1630 case ISD::UMULFIX:
1631 case ISD::UMULFIXSAT: return visitMULFIX(N);
1632 case ISD::MUL: return visitMUL(N);
1633 case ISD::SDIV: return visitSDIV(N);
1634 case ISD::UDIV: return visitUDIV(N);
1635 case ISD::SREM:
1636 case ISD::UREM: return visitREM(N);
1637 case ISD::MULHU: return visitMULHU(N);
1638 case ISD::MULHS: return visitMULHS(N);
1639 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1640 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1641 case ISD::SMULO:
1642 case ISD::UMULO: return visitMULO(N);
1643 case ISD::SMIN:
1644 case ISD::SMAX:
1645 case ISD::UMIN:
1646 case ISD::UMAX: return visitIMINMAX(N);
1647 case ISD::AND: return visitAND(N);
1648 case ISD::OR: return visitOR(N);
1649 case ISD::XOR: return visitXOR(N);
1650 case ISD::SHL: return visitSHL(N);
1651 case ISD::SRA: return visitSRA(N);
1652 case ISD::SRL: return visitSRL(N);
1653 case ISD::ROTR:
1654 case ISD::ROTL: return visitRotate(N);
1655 case ISD::FSHL:
1656 case ISD::FSHR: return visitFunnelShift(N);
1657 case ISD::ABS: return visitABS(N);
1658 case ISD::BSWAP: return visitBSWAP(N);
1659 case ISD::BITREVERSE: return visitBITREVERSE(N);
1660 case ISD::CTLZ: return visitCTLZ(N);
1661 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1662 case ISD::CTTZ: return visitCTTZ(N);
1663 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1664 case ISD::CTPOP: return visitCTPOP(N);
1665 case ISD::SELECT: return visitSELECT(N);
1666 case ISD::VSELECT: return visitVSELECT(N);
1667 case ISD::SELECT_CC: return visitSELECT_CC(N);
1668 case ISD::SETCC: return visitSETCC(N);
1669 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1670 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1671 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1672 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1673 case ISD::AssertSext:
1674 case ISD::AssertZext: return visitAssertExt(N);
1675 case ISD::AssertAlign: return visitAssertAlign(N);
1676 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1677 case ISD::SIGN_EXTEND_VECTOR_INREG:
1678 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1679 case ISD::TRUNCATE: return visitTRUNCATE(N);
1680 case ISD::BITCAST: return visitBITCAST(N);
1681 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1682 case ISD::FADD: return visitFADD(N);
1683 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1684 case ISD::FSUB: return visitFSUB(N);
1685 case ISD::FMUL: return visitFMUL(N);
1686 case ISD::FMA: return visitFMA(N);
1687 case ISD::FDIV: return visitFDIV(N);
1688 case ISD::FREM: return visitFREM(N);
1689 case ISD::FSQRT: return visitFSQRT(N);
1690 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1691 case ISD::FPOW: return visitFPOW(N);
1692 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1693 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1694 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1695 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1696 case ISD::FP_ROUND: return visitFP_ROUND(N);
1697 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1698 case ISD::FNEG: return visitFNEG(N);
1699 case ISD::FABS: return visitFABS(N);
1700 case ISD::FFLOOR: return visitFFLOOR(N);
1701 case ISD::FMINNUM:
1702 case ISD::FMAXNUM:
1703 case ISD::FMINIMUM:
1704 case ISD::FMAXIMUM: return visitFMinMax(N);
1705 case ISD::FCEIL: return visitFCEIL(N);
1706 case ISD::FTRUNC: return visitFTRUNC(N);
1707 case ISD::BRCOND: return visitBRCOND(N);
1708 case ISD::BR_CC: return visitBR_CC(N);
1709 case ISD::LOAD: return visitLOAD(N);
1710 case ISD::STORE: return visitSTORE(N);
1711 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1712 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1714 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1715 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1716 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1717 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1718 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1719 case ISD::MGATHER: return visitMGATHER(N);
1720 case ISD::MLOAD: return visitMLOAD(N);
1721 case ISD::MSCATTER: return visitMSCATTER(N);
1722 case ISD::MSTORE: return visitMSTORE(N);
1723 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1724 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1725 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1726 case ISD::FREEZE: return visitFREEZE(N);
1727 case ISD::VECREDUCE_FADD:
1728 case ISD::VECREDUCE_FMUL:
1729 case ISD::VECREDUCE_ADD:
1730 case ISD::VECREDUCE_MUL:
1731 case ISD::VECREDUCE_AND:
1732 case ISD::VECREDUCE_OR:
1733 case ISD::VECREDUCE_XOR:
1734 case ISD::VECREDUCE_SMAX:
1735 case ISD::VECREDUCE_SMIN:
1736 case ISD::VECREDUCE_UMAX:
1737 case ISD::VECREDUCE_UMIN:
1738 case ISD::VECREDUCE_FMAX:
1739 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1740 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1741 #include "llvm/IR/VPIntrinsics.def"
1742 return visitVPOp(N);
1744 return SDValue();
1747 SDValue DAGCombiner::combine(SDNode *N) {
1748 SDValue RV;
1749 if (!DisableGenericCombines)
1750 RV = visit(N);
1752 // If nothing happened, try a target-specific DAG combine.
1753 if (!RV.getNode()) {
1754 assert(N->getOpcode() != ISD::DELETED_NODE &&
1755 "Node was deleted but visit returned NULL!");
1757 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1758 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1760 // Expose the DAG combiner to the target combiner impls.
1761 TargetLowering::DAGCombinerInfo
1762 DagCombineInfo(DAG, Level, false, this);
1764 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1768 // If nothing happened still, try promoting the operation.
1769 if (!RV.getNode()) {
1770 switch (N->getOpcode()) {
1771 default: break;
1772 case ISD::ADD:
1773 case ISD::SUB:
1774 case ISD::MUL:
1775 case ISD::AND:
1776 case ISD::OR:
1777 case ISD::XOR:
1778 RV = PromoteIntBinOp(SDValue(N, 0));
1779 break;
1780 case ISD::SHL:
1781 case ISD::SRA:
1782 case ISD::SRL:
1783 RV = PromoteIntShiftOp(SDValue(N, 0));
1784 break;
1785 case ISD::SIGN_EXTEND:
1786 case ISD::ZERO_EXTEND:
1787 case ISD::ANY_EXTEND:
1788 RV = PromoteExtend(SDValue(N, 0));
1789 break;
1790 case ISD::LOAD:
1791 if (PromoteLoad(SDValue(N, 0)))
1792 RV = SDValue(N, 0);
1793 break;
1797 // If N is a commutative binary node, try to eliminate it if the commuted
1798 // version is already present in the DAG.
1799 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1800 N->getNumValues() == 1) {
1801 SDValue N0 = N->getOperand(0);
1802 SDValue N1 = N->getOperand(1);
1804 // Constant operands are canonicalized to RHS.
1805 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1806 SDValue Ops[] = {N1, N0};
1807 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1808 N->getFlags());
1809 if (CSENode)
1810 return SDValue(CSENode, 0);
1814 return RV;
1817 /// Given a node, return its input chain if it has one, otherwise return a null
1818 /// sd operand.
1819 static SDValue getInputChainForNode(SDNode *N) {
1820 if (unsigned NumOps = N->getNumOperands()) {
1821 if (N->getOperand(0).getValueType() == MVT::Other)
1822 return N->getOperand(0);
1823 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1824 return N->getOperand(NumOps-1);
1825 for (unsigned i = 1; i < NumOps-1; ++i)
1826 if (N->getOperand(i).getValueType() == MVT::Other)
1827 return N->getOperand(i);
1829 return SDValue();
1832 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1833 // If N has two operands, where one has an input chain equal to the other,
1834 // the 'other' chain is redundant.
1835 if (N->getNumOperands() == 2) {
1836 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1837 return N->getOperand(0);
1838 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1839 return N->getOperand(1);
1842 // Don't simplify token factors if optnone.
1843 if (OptLevel == CodeGenOpt::None)
1844 return SDValue();
1846 // Don't simplify the token factor if the node itself has too many operands.
1847 if (N->getNumOperands() > TokenFactorInlineLimit)
1848 return SDValue();
1850 // If the sole user is a token factor, we should make sure we have a
1851 // chance to merge them together. This prevents TF chains from inhibiting
1852 // optimizations.
1853 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1854 AddToWorklist(*(N->use_begin()));
1856 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1857 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1858 SmallPtrSet<SDNode*, 16> SeenOps;
1859 bool Changed = false; // If we should replace this token factor.
1861 // Start out with this token factor.
1862 TFs.push_back(N);
1864 // Iterate through token factors. The TFs grows when new token factors are
1865 // encountered.
1866 for (unsigned i = 0; i < TFs.size(); ++i) {
1867 // Limit number of nodes to inline, to avoid quadratic compile times.
1868 // We have to add the outstanding Token Factors to Ops, otherwise we might
1869 // drop Ops from the resulting Token Factors.
1870 if (Ops.size() > TokenFactorInlineLimit) {
1871 for (unsigned j = i; j < TFs.size(); j++)
1872 Ops.emplace_back(TFs[j], 0);
1873 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1874 // combiner worklist later.
1875 TFs.resize(i);
1876 break;
1879 SDNode *TF = TFs[i];
1880 // Check each of the operands.
1881 for (const SDValue &Op : TF->op_values()) {
1882 switch (Op.getOpcode()) {
1883 case ISD::EntryToken:
1884 // Entry tokens don't need to be added to the list. They are
1885 // redundant.
1886 Changed = true;
1887 break;
1889 case ISD::TokenFactor:
1890 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1891 // Queue up for processing.
1892 TFs.push_back(Op.getNode());
1893 Changed = true;
1894 break;
1896 LLVM_FALLTHROUGH;
1898 default:
1899 // Only add if it isn't already in the list.
1900 if (SeenOps.insert(Op.getNode()).second)
1901 Ops.push_back(Op);
1902 else
1903 Changed = true;
1904 break;
1909 // Re-visit inlined Token Factors, to clean them up in case they have been
1910 // removed. Skip the first Token Factor, as this is the current node.
1911 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1912 AddToWorklist(TFs[i]);
1914 // Remove Nodes that are chained to another node in the list. Do so
1915 // by walking up chains breath-first stopping when we've seen
1916 // another operand. In general we must climb to the EntryNode, but we can exit
1917 // early if we find all remaining work is associated with just one operand as
1918 // no further pruning is possible.
1920 // List of nodes to search through and original Ops from which they originate.
1921 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1922 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1923 SmallPtrSet<SDNode *, 16> SeenChains;
1924 bool DidPruneOps = false;
1926 unsigned NumLeftToConsider = 0;
1927 for (const SDValue &Op : Ops) {
1928 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1929 OpWorkCount.push_back(1);
1932 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1933 // If this is an Op, we can remove the op from the list. Remark any
1934 // search associated with it as from the current OpNumber.
1935 if (SeenOps.contains(Op)) {
1936 Changed = true;
1937 DidPruneOps = true;
1938 unsigned OrigOpNumber = 0;
1939 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1940 OrigOpNumber++;
1941 assert((OrigOpNumber != Ops.size()) &&
1942 "expected to find TokenFactor Operand");
1943 // Re-mark worklist from OrigOpNumber to OpNumber
1944 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1945 if (Worklist[i].second == OrigOpNumber) {
1946 Worklist[i].second = OpNumber;
1949 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1950 OpWorkCount[OrigOpNumber] = 0;
1951 NumLeftToConsider--;
1953 // Add if it's a new chain
1954 if (SeenChains.insert(Op).second) {
1955 OpWorkCount[OpNumber]++;
1956 Worklist.push_back(std::make_pair(Op, OpNumber));
1960 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1961 // We need at least be consider at least 2 Ops to prune.
1962 if (NumLeftToConsider <= 1)
1963 break;
1964 auto CurNode = Worklist[i].first;
1965 auto CurOpNumber = Worklist[i].second;
1966 assert((OpWorkCount[CurOpNumber] > 0) &&
1967 "Node should not appear in worklist");
1968 switch (CurNode->getOpcode()) {
1969 case ISD::EntryToken:
1970 // Hitting EntryToken is the only way for the search to terminate without
1971 // hitting
1972 // another operand's search. Prevent us from marking this operand
1973 // considered.
1974 NumLeftToConsider++;
1975 break;
1976 case ISD::TokenFactor:
1977 for (const SDValue &Op : CurNode->op_values())
1978 AddToWorklist(i, Op.getNode(), CurOpNumber);
1979 break;
1980 case ISD::LIFETIME_START:
1981 case ISD::LIFETIME_END:
1982 case ISD::CopyFromReg:
1983 case ISD::CopyToReg:
1984 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1985 break;
1986 default:
1987 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1988 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1989 break;
1991 OpWorkCount[CurOpNumber]--;
1992 if (OpWorkCount[CurOpNumber] == 0)
1993 NumLeftToConsider--;
1996 // If we've changed things around then replace token factor.
1997 if (Changed) {
1998 SDValue Result;
1999 if (Ops.empty()) {
2000 // The entry token is the only possible outcome.
2001 Result = DAG.getEntryNode();
2002 } else {
2003 if (DidPruneOps) {
2004 SmallVector<SDValue, 8> PrunedOps;
2006 for (const SDValue &Op : Ops) {
2007 if (SeenChains.count(Op.getNode()) == 0)
2008 PrunedOps.push_back(Op);
2010 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2011 } else {
2012 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2015 return Result;
2017 return SDValue();
2020 /// MERGE_VALUES can always be eliminated.
2021 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2022 WorklistRemover DeadNodes(*this);
2023 // Replacing results may cause a different MERGE_VALUES to suddenly
2024 // be CSE'd with N, and carry its uses with it. Iterate until no
2025 // uses remain, to ensure that the node can be safely deleted.
2026 // First add the users of this node to the work list so that they
2027 // can be tried again once they have new operands.
2028 AddUsersToWorklist(N);
2029 do {
2030 // Do as a single replacement to avoid rewalking use lists.
2031 SmallVector<SDValue, 8> Ops;
2032 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2033 Ops.push_back(N->getOperand(i));
2034 DAG.ReplaceAllUsesWith(N, Ops.data());
2035 } while (!N->use_empty());
2036 deleteAndRecombine(N);
2037 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2040 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2041 /// ConstantSDNode pointer else nullptr.
2042 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2043 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2044 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2047 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2048 /// and that N may be folded in the load / store addressing mode.
2049 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2050 const TargetLowering &TLI) {
2051 EVT VT;
2052 unsigned AS;
2054 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2055 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2056 return false;
2057 VT = LD->getMemoryVT();
2058 AS = LD->getAddressSpace();
2059 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2060 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2061 return false;
2062 VT = ST->getMemoryVT();
2063 AS = ST->getAddressSpace();
2064 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2065 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2066 return false;
2067 VT = LD->getMemoryVT();
2068 AS = LD->getAddressSpace();
2069 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2070 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2071 return false;
2072 VT = ST->getMemoryVT();
2073 AS = ST->getAddressSpace();
2074 } else
2075 return false;
2077 TargetLowering::AddrMode AM;
2078 if (N->getOpcode() == ISD::ADD) {
2079 AM.HasBaseReg = true;
2080 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2081 if (Offset)
2082 // [reg +/- imm]
2083 AM.BaseOffs = Offset->getSExtValue();
2084 else
2085 // [reg +/- reg]
2086 AM.Scale = 1;
2087 } else if (N->getOpcode() == ISD::SUB) {
2088 AM.HasBaseReg = true;
2089 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2090 if (Offset)
2091 // [reg +/- imm]
2092 AM.BaseOffs = -Offset->getSExtValue();
2093 else
2094 // [reg +/- reg]
2095 AM.Scale = 1;
2096 } else
2097 return false;
2099 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2100 VT.getTypeForEVT(*DAG.getContext()), AS);
2103 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2104 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2105 "Unexpected binary operator");
2107 // Don't do this unless the old select is going away. We want to eliminate the
2108 // binary operator, not replace a binop with a select.
2109 // TODO: Handle ISD::SELECT_CC.
2110 unsigned SelOpNo = 0;
2111 SDValue Sel = BO->getOperand(0);
2112 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2113 SelOpNo = 1;
2114 Sel = BO->getOperand(1);
2117 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2118 return SDValue();
2120 SDValue CT = Sel.getOperand(1);
2121 if (!isConstantOrConstantVector(CT, true) &&
2122 !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2123 return SDValue();
2125 SDValue CF = Sel.getOperand(2);
2126 if (!isConstantOrConstantVector(CF, true) &&
2127 !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2128 return SDValue();
2130 // Bail out if any constants are opaque because we can't constant fold those.
2131 // The exception is "and" and "or" with either 0 or -1 in which case we can
2132 // propagate non constant operands into select. I.e.:
2133 // and (select Cond, 0, -1), X --> select Cond, 0, X
2134 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2135 auto BinOpcode = BO->getOpcode();
2136 bool CanFoldNonConst =
2137 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2138 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2139 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2141 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2142 if (!CanFoldNonConst &&
2143 !isConstantOrConstantVector(CBO, true) &&
2144 !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2145 return SDValue();
2147 EVT VT = BO->getValueType(0);
2149 // We have a select-of-constants followed by a binary operator with a
2150 // constant. Eliminate the binop by pulling the constant math into the select.
2151 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2152 SDLoc DL(Sel);
2153 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2154 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2155 if (!CanFoldNonConst && !NewCT.isUndef() &&
2156 !isConstantOrConstantVector(NewCT, true) &&
2157 !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2158 return SDValue();
2160 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2161 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2162 if (!CanFoldNonConst && !NewCF.isUndef() &&
2163 !isConstantOrConstantVector(NewCF, true) &&
2164 !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2165 return SDValue();
2167 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2168 SelectOp->setFlags(BO->getFlags());
2169 return SelectOp;
2172 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2173 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2174 "Expecting add or sub");
2176 // Match a constant operand and a zext operand for the math instruction:
2177 // add Z, C
2178 // sub C, Z
2179 bool IsAdd = N->getOpcode() == ISD::ADD;
2180 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2181 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2182 auto *CN = dyn_cast<ConstantSDNode>(C);
2183 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2184 return SDValue();
2186 // Match the zext operand as a setcc of a boolean.
2187 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2188 Z.getOperand(0).getValueType() != MVT::i1)
2189 return SDValue();
2191 // Match the compare as: setcc (X & 1), 0, eq.
2192 SDValue SetCC = Z.getOperand(0);
2193 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2194 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2195 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2196 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2197 return SDValue();
2199 // We are adding/subtracting a constant and an inverted low bit. Turn that
2200 // into a subtract/add of the low bit with incremented/decremented constant:
2201 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2202 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2203 EVT VT = C.getValueType();
2204 SDLoc DL(N);
2205 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2206 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2207 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2208 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2211 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2212 /// a shift and add with a different constant.
2213 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2214 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2215 "Expecting add or sub");
2217 // We need a constant operand for the add/sub, and the other operand is a
2218 // logical shift right: add (srl), C or sub C, (srl).
2219 bool IsAdd = N->getOpcode() == ISD::ADD;
2220 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2221 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2222 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2223 ShiftOp.getOpcode() != ISD::SRL)
2224 return SDValue();
2226 // The shift must be of a 'not' value.
2227 SDValue Not = ShiftOp.getOperand(0);
2228 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2229 return SDValue();
2231 // The shift must be moving the sign bit to the least-significant-bit.
2232 EVT VT = ShiftOp.getValueType();
2233 SDValue ShAmt = ShiftOp.getOperand(1);
2234 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2235 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2236 return SDValue();
2238 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2239 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2240 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2241 SDLoc DL(N);
2242 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2243 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2244 if (SDValue NewC =
2245 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2246 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2247 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2248 return SDValue();
2251 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2252 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2253 /// are no common bits set in the operands).
2254 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2255 SDValue N0 = N->getOperand(0);
2256 SDValue N1 = N->getOperand(1);
2257 EVT VT = N0.getValueType();
2258 SDLoc DL(N);
2260 // fold (add x, undef) -> undef
2261 if (N0.isUndef())
2262 return N0;
2263 if (N1.isUndef())
2264 return N1;
2266 // fold (add c1, c2) -> c1+c2
2267 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2268 return C;
2270 // canonicalize constant to RHS
2271 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2272 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2273 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2275 // fold vector ops
2276 if (VT.isVector()) {
2277 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2278 return FoldedVOp;
2280 // fold (add x, 0) -> x, vector edition
2281 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2282 return N0;
2285 // fold (add x, 0) -> x
2286 if (isNullConstant(N1))
2287 return N0;
2289 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290 // fold ((A-c1)+c2) -> (A+(c2-c1))
2291 if (N0.getOpcode() == ISD::SUB &&
2292 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293 SDValue Sub =
2294 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295 assert(Sub && "Constant folding failed");
2296 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2299 // fold ((c1-A)+c2) -> (c1+c2)-A
2300 if (N0.getOpcode() == ISD::SUB &&
2301 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302 SDValue Add =
2303 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304 assert(Add && "Constant folding failed");
2305 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2308 // add (sext i1 X), 1 -> zext (not i1 X)
2309 // We don't transform this pattern:
2310 // add (zext i1 X), -1 -> sext (not i1 X)
2311 // because most (?) targets generate better code for the zext form.
2312 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313 isOneOrOneSplat(N1)) {
2314 SDValue X = N0.getOperand(0);
2315 if ((!LegalOperations ||
2316 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318 X.getScalarValueSizeInBits() == 1) {
2319 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2324 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325 // equivalent to (add x, c0).
2326 if (N0.getOpcode() == ISD::OR &&
2327 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330 {N1, N0.getOperand(1)}))
2331 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2335 if (SDValue NewSel = foldBinOpIntoSelect(N))
2336 return NewSel;
2338 // reassociate add
2339 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341 return RADD;
2343 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344 // equivalent to (add x, c).
2345 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346 if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349 return DAG.getNode(ISD::ADD, DL, VT,
2350 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351 N0.getOperand(1));
2353 return SDValue();
2355 if (SDValue Add = ReassociateAddOr(N0, N1))
2356 return Add;
2357 if (SDValue Add = ReassociateAddOr(N1, N0))
2358 return Add;
2360 // fold ((0-A) + B) -> B-A
2361 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2364 // fold (A + (0-B)) -> A-B
2365 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2368 // fold (A+(B-A)) -> B
2369 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370 return N1.getOperand(0);
2372 // fold ((B-A)+A) -> B
2373 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374 return N0.getOperand(0);
2376 // fold ((A-B)+(C-A)) -> (C-B)
2377 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378 N0.getOperand(0) == N1.getOperand(1))
2379 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380 N0.getOperand(1));
2382 // fold ((A-B)+(B-C)) -> (A-C)
2383 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384 N0.getOperand(1) == N1.getOperand(0))
2385 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386 N1.getOperand(1));
2388 // fold (A+(B-(A+C))) to (B-C)
2389 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390 N0 == N1.getOperand(1).getOperand(0))
2391 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392 N1.getOperand(1).getOperand(1));
2394 // fold (A+(B-(C+A))) to (B-C)
2395 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396 N0 == N1.getOperand(1).getOperand(1))
2397 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398 N1.getOperand(1).getOperand(0));
2400 // fold (A+((B-A)+or-C)) to (B+or-C)
2401 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402 N1.getOperand(0).getOpcode() == ISD::SUB &&
2403 N0 == N1.getOperand(0).getOperand(1))
2404 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405 N1.getOperand(1));
2407 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409 SDValue N00 = N0.getOperand(0);
2410 SDValue N01 = N0.getOperand(1);
2411 SDValue N10 = N1.getOperand(0);
2412 SDValue N11 = N1.getOperand(1);
2414 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415 return DAG.getNode(ISD::SUB, DL, VT,
2416 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2420 // fold (add (umax X, C), -C) --> (usubsat X, C)
2421 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423 return (!Max && !Op) ||
2424 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2426 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427 /*AllowUndefs*/ true))
2428 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429 N0.getOperand(1));
2432 if (SimplifyDemandedBits(SDValue(N, 0)))
2433 return SDValue(N, 0);
2435 if (isOneOrOneSplat(N1)) {
2436 // fold (add (xor a, -1), 1) -> (sub 0, a)
2437 if (isBitwiseNot(N0))
2438 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439 N0.getOperand(0));
2441 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442 if (N0.getOpcode() == ISD::ADD) {
2443 SDValue A, Xor;
2445 if (isBitwiseNot(N0.getOperand(0))) {
2446 A = N0.getOperand(1);
2447 Xor = N0.getOperand(0);
2448 } else if (isBitwiseNot(N0.getOperand(1))) {
2449 A = N0.getOperand(0);
2450 Xor = N0.getOperand(1);
2453 if (Xor)
2454 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2457 // Look for:
2458 // add (add x, y), 1
2459 // And if the target does not like this form then turn into:
2460 // sub y, (xor x, -1)
2461 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462 N0.getOpcode() == ISD::ADD) {
2463 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464 DAG.getAllOnesConstant(DL, VT));
2465 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2469 // (x - y) + -1 -> add (xor y, -1), x
2470 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2471 isAllOnesOrAllOnesSplat(N1)) {
2472 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2476 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477 return Combined;
2479 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480 return Combined;
2482 return SDValue();
2485 SDValue DAGCombiner::visitADD(SDNode *N) {
2486 SDValue N0 = N->getOperand(0);
2487 SDValue N1 = N->getOperand(1);
2488 EVT VT = N0.getValueType();
2489 SDLoc DL(N);
2491 if (SDValue Combined = visitADDLike(N))
2492 return Combined;
2494 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495 return V;
2497 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498 return V;
2500 // fold (a+b) -> (a|b) iff a and b share no bits.
2501 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502 DAG.haveNoCommonBitsSet(N0, N1))
2503 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2505 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507 const APInt &C0 = N0->getConstantOperandAPInt(0);
2508 const APInt &C1 = N1->getConstantOperandAPInt(0);
2509 return DAG.getVScale(DL, VT, C0 + C1);
2512 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513 if ((N0.getOpcode() == ISD::ADD) &&
2514 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515 (N1.getOpcode() == ISD::VSCALE)) {
2516 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2522 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2523 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524 N1.getOpcode() == ISD::STEP_VECTOR) {
2525 const APInt &C0 = N0->getConstantOperandAPInt(0);
2526 const APInt &C1 = N1->getConstantOperandAPInt(0);
2527 APInt NewStep = C0 + C1;
2528 return DAG.getStepVector(DL, VT, NewStep);
2531 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532 if ((N0.getOpcode() == ISD::ADD) &&
2533 (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534 (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537 APInt NewStep = SV0 + SV1;
2538 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2542 return SDValue();
2545 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546 unsigned Opcode = N->getOpcode();
2547 SDValue N0 = N->getOperand(0);
2548 SDValue N1 = N->getOperand(1);
2549 EVT VT = N0.getValueType();
2550 SDLoc DL(N);
2552 // fold (add_sat x, undef) -> -1
2553 if (N0.isUndef() || N1.isUndef())
2554 return DAG.getAllOnesConstant(DL, VT);
2556 // fold (add_sat c1, c2) -> c3
2557 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2558 return C;
2560 // canonicalize constant to RHS
2561 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2562 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2563 return DAG.getNode(Opcode, DL, VT, N1, N0);
2565 // fold vector ops
2566 if (VT.isVector()) {
2567 // TODO SimplifyVBinOp
2569 // fold (add_sat x, 0) -> x, vector edition
2570 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2571 return N0;
2574 // fold (add_sat x, 0) -> x
2575 if (isNullConstant(N1))
2576 return N0;
2578 // If it cannot overflow, transform into an add.
2579 if (Opcode == ISD::UADDSAT)
2580 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2581 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2583 return SDValue();
2586 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2587 bool Masked = false;
2589 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2590 while (true) {
2591 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2592 V = V.getOperand(0);
2593 continue;
2596 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2597 Masked = true;
2598 V = V.getOperand(0);
2599 continue;
2602 break;
2605 // If this is not a carry, return.
2606 if (V.getResNo() != 1)
2607 return SDValue();
2609 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2610 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2611 return SDValue();
2613 EVT VT = V.getNode()->getValueType(0);
2614 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2615 return SDValue();
2617 // If the result is masked, then no matter what kind of bool it is we can
2618 // return. If it isn't, then we need to make sure the bool type is either 0 or
2619 // 1 and not other values.
2620 if (Masked ||
2621 TLI.getBooleanContents(V.getValueType()) ==
2622 TargetLoweringBase::ZeroOrOneBooleanContent)
2623 return V;
2625 return SDValue();
2628 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2629 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2630 /// the opcode and bypass the mask operation.
2631 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2632 SelectionDAG &DAG, const SDLoc &DL) {
2633 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2634 return SDValue();
2636 EVT VT = N0.getValueType();
2637 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2638 return SDValue();
2640 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2641 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2642 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2645 /// Helper for doing combines based on N0 and N1 being added to each other.
2646 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2647 SDNode *LocReference) {
2648 EVT VT = N0.getValueType();
2649 SDLoc DL(LocReference);
2651 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2652 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2653 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2654 return DAG.getNode(ISD::SUB, DL, VT, N0,
2655 DAG.getNode(ISD::SHL, DL, VT,
2656 N1.getOperand(0).getOperand(1),
2657 N1.getOperand(1)));
2659 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2660 return V;
2662 // Look for:
2663 // add (add x, 1), y
2664 // And if the target does not like this form then turn into:
2665 // sub y, (xor x, -1)
2666 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2667 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2668 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2669 DAG.getAllOnesConstant(DL, VT));
2670 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2673 // Hoist one-use subtraction by non-opaque constant:
2674 // (x - C) + y -> (x + y) - C
2675 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2676 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2677 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2678 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2679 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2681 // Hoist one-use subtraction from non-opaque constant:
2682 // (C - x) + y -> (y - x) + C
2683 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2684 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2685 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2686 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2689 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2690 // rather than 'add 0/-1' (the zext should get folded).
2691 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2692 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2693 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2694 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2695 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2696 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2699 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2700 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2701 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2702 if (TN->getVT() == MVT::i1) {
2703 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2704 DAG.getConstant(1, DL, VT));
2705 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2709 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2710 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2711 N1.getResNo() == 0)
2712 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2713 N0, N1.getOperand(0), N1.getOperand(2));
2715 // (add X, Carry) -> (addcarry X, 0, Carry)
2716 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2717 if (SDValue Carry = getAsCarry(TLI, N1))
2718 return DAG.getNode(ISD::ADDCARRY, DL,
2719 DAG.getVTList(VT, Carry.getValueType()), N0,
2720 DAG.getConstant(0, DL, VT), Carry);
2722 return SDValue();
2725 SDValue DAGCombiner::visitADDC(SDNode *N) {
2726 SDValue N0 = N->getOperand(0);
2727 SDValue N1 = N->getOperand(1);
2728 EVT VT = N0.getValueType();
2729 SDLoc DL(N);
2731 // If the flag result is dead, turn this into an ADD.
2732 if (!N->hasAnyUseOfValue(1))
2733 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2734 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2736 // canonicalize constant to RHS.
2737 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2738 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2739 if (N0C && !N1C)
2740 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2742 // fold (addc x, 0) -> x + no carry out
2743 if (isNullConstant(N1))
2744 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2745 DL, MVT::Glue));
2747 // If it cannot overflow, transform into an add.
2748 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2749 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2750 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2752 return SDValue();
2756 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2757 * then the flip also occurs if computing the inverse is the same cost.
2758 * This function returns an empty SDValue in case it cannot flip the boolean
2759 * without increasing the cost of the computation. If you want to flip a boolean
2760 * no matter what, use DAG.getLogicalNOT.
2762 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2763 const TargetLowering &TLI,
2764 bool Force) {
2765 if (Force && isa<ConstantSDNode>(V))
2766 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768 if (V.getOpcode() != ISD::XOR)
2769 return SDValue();
2771 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2772 if (!Const)
2773 return SDValue();
2775 EVT VT = V.getValueType();
2777 bool IsFlip = false;
2778 switch(TLI.getBooleanContents(VT)) {
2779 case TargetLowering::ZeroOrOneBooleanContent:
2780 IsFlip = Const->isOne();
2781 break;
2782 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2783 IsFlip = Const->isAllOnes();
2784 break;
2785 case TargetLowering::UndefinedBooleanContent:
2786 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2787 break;
2790 if (IsFlip)
2791 return V.getOperand(0);
2792 if (Force)
2793 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2794 return SDValue();
2797 SDValue DAGCombiner::visitADDO(SDNode *N) {
2798 SDValue N0 = N->getOperand(0);
2799 SDValue N1 = N->getOperand(1);
2800 EVT VT = N0.getValueType();
2801 bool IsSigned = (ISD::SADDO == N->getOpcode());
2803 EVT CarryVT = N->getValueType(1);
2804 SDLoc DL(N);
2806 // If the flag result is dead, turn this into an ADD.
2807 if (!N->hasAnyUseOfValue(1))
2808 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2809 DAG.getUNDEF(CarryVT));
2811 // canonicalize constant to RHS.
2812 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2813 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2814 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2816 // fold (addo x, 0) -> x + no carry out
2817 if (isNullOrNullSplat(N1))
2818 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2820 if (!IsSigned) {
2821 // If it cannot overflow, transform into an add.
2822 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2823 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2824 DAG.getConstant(0, DL, CarryVT));
2826 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2827 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2828 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2829 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2830 return CombineTo(
2831 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2834 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2835 return Combined;
2837 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2838 return Combined;
2841 return SDValue();
2844 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2845 EVT VT = N0.getValueType();
2846 if (VT.isVector())
2847 return SDValue();
2849 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2850 // If Y + 1 cannot overflow.
2851 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2852 SDValue Y = N1.getOperand(0);
2853 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2854 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2855 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2856 N1.getOperand(2));
2859 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2860 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2861 if (SDValue Carry = getAsCarry(TLI, N1))
2862 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2863 DAG.getConstant(0, SDLoc(N), VT), Carry);
2865 return SDValue();
2868 SDValue DAGCombiner::visitADDE(SDNode *N) {
2869 SDValue N0 = N->getOperand(0);
2870 SDValue N1 = N->getOperand(1);
2871 SDValue CarryIn = N->getOperand(2);
2873 // canonicalize constant to RHS
2874 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2875 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2876 if (N0C && !N1C)
2877 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2878 N1, N0, CarryIn);
2880 // fold (adde x, y, false) -> (addc x, y)
2881 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2882 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2884 return SDValue();
2887 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2888 SDValue N0 = N->getOperand(0);
2889 SDValue N1 = N->getOperand(1);
2890 SDValue CarryIn = N->getOperand(2);
2891 SDLoc DL(N);
2893 // canonicalize constant to RHS
2894 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2895 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2896 if (N0C && !N1C)
2897 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2899 // fold (addcarry x, y, false) -> (uaddo x, y)
2900 if (isNullConstant(CarryIn)) {
2901 if (!LegalOperations ||
2902 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2903 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2906 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2907 if (isNullConstant(N0) && isNullConstant(N1)) {
2908 EVT VT = N0.getValueType();
2909 EVT CarryVT = CarryIn.getValueType();
2910 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2911 AddToWorklist(CarryExt.getNode());
2912 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2913 DAG.getConstant(1, DL, VT)),
2914 DAG.getConstant(0, DL, CarryVT));
2917 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2918 return Combined;
2920 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2921 return Combined;
2923 return SDValue();
2926 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2927 SDValue N0 = N->getOperand(0);
2928 SDValue N1 = N->getOperand(1);
2929 SDValue CarryIn = N->getOperand(2);
2930 SDLoc DL(N);
2932 // canonicalize constant to RHS
2933 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2934 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2935 if (N0C && !N1C)
2936 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2938 // fold (saddo_carry x, y, false) -> (saddo x, y)
2939 if (isNullConstant(CarryIn)) {
2940 if (!LegalOperations ||
2941 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2942 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2945 return SDValue();
2949 * If we are facing some sort of diamond carry propapagtion pattern try to
2950 * break it up to generate something like:
2951 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2953 * The end result is usually an increase in operation required, but because the
2954 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2956 * Patterns typically look something like
2957 * (uaddo A, B)
2958 * / \
2959 * Carry Sum
2960 * | \
2961 * | (addcarry *, 0, Z)
2962 * | /
2963 * \ Carry
2964 * | /
2965 * (addcarry X, *, *)
2967 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2968 * produce a combine with a single path for carry propagation.
2970 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2971 SDValue X, SDValue Carry0, SDValue Carry1,
2972 SDNode *N) {
2973 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2974 return SDValue();
2975 if (Carry1.getOpcode() != ISD::UADDO)
2976 return SDValue();
2978 SDValue Z;
2981 * First look for a suitable Z. It will present itself in the form of
2982 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2984 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2985 isNullConstant(Carry0.getOperand(1))) {
2986 Z = Carry0.getOperand(2);
2987 } else if (Carry0.getOpcode() == ISD::UADDO &&
2988 isOneConstant(Carry0.getOperand(1))) {
2989 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2990 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2991 } else {
2992 // We couldn't find a suitable Z.
2993 return SDValue();
2997 auto cancelDiamond = [&](SDValue A,SDValue B) {
2998 SDLoc DL(N);
2999 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3000 Combiner.AddToWorklist(NewY.getNode());
3001 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3002 DAG.getConstant(0, DL, X.getValueType()),
3003 NewY.getValue(1));
3007 * (uaddo A, B)
3009 * Sum
3011 * (addcarry *, 0, Z)
3013 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3014 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3018 * (addcarry A, 0, Z)
3020 * Sum
3022 * (uaddo *, B)
3024 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3025 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3028 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3029 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3032 return SDValue();
3035 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3036 // match patterns like:
3038 // (uaddo A, B) CarryIn
3039 // | \ |
3040 // | \ |
3041 // PartialSum PartialCarryOutX /
3042 // | | /
3043 // | ____|____________/
3044 // | / |
3045 // (uaddo *, *) \________
3046 // | \ \
3047 // | \ |
3048 // | PartialCarryOutY |
3049 // | \ |
3050 // | \ /
3051 // AddCarrySum | ______/
3052 // | /
3053 // CarryOut = (or *, *)
3055 // And generate ADDCARRY (or SUBCARRY) with two result values:
3057 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3059 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3060 // a single path for carry/borrow out propagation:
3061 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3062 const TargetLowering &TLI, SDValue Carry0,
3063 SDValue Carry1, SDNode *N) {
3064 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3065 return SDValue();
3066 unsigned Opcode = Carry0.getOpcode();
3067 if (Opcode != Carry1.getOpcode())
3068 return SDValue();
3069 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3070 return SDValue();
3072 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3073 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3074 // the above ASCII art.)
3075 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3076 Carry1.getOperand(1) != Carry0.getValue(0))
3077 std::swap(Carry0, Carry1);
3078 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3079 Carry1.getOperand(1) != Carry0.getValue(0))
3080 return SDValue();
3082 // The carry in value must be on the righthand side for subtraction.
3083 unsigned CarryInOperandNum =
3084 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3085 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3086 return SDValue();
3087 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3089 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3090 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3091 return SDValue();
3093 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3094 // TODO: make getAsCarry() aware of how partial carries are merged.
3095 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3096 return SDValue();
3097 CarryIn = CarryIn.getOperand(0);
3098 if (CarryIn.getValueType() != MVT::i1)
3099 return SDValue();
3101 SDLoc DL(N);
3102 SDValue Merged =
3103 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3104 Carry0.getOperand(1), CarryIn);
3106 // Please note that because we have proven that the result of the UADDO/USUBO
3107 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3108 // therefore prove that if the first UADDO/USUBO overflows, the second
3109 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3110 // maximum value.
3112 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3113 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3115 // This is important because it means that OR and XOR can be used to merge
3116 // carry flags; and that AND can return a constant zero.
3118 // TODO: match other operations that can merge flags (ADD, etc)
3119 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3120 if (N->getOpcode() == ISD::AND)
3121 return DAG.getConstant(0, DL, MVT::i1);
3122 return Merged.getValue(1);
3125 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3126 SDNode *N) {
3127 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3128 if (isBitwiseNot(N0))
3129 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3130 SDLoc DL(N);
3131 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3132 N0.getOperand(0), NotC);
3133 return CombineTo(
3134 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3137 // Iff the flag result is dead:
3138 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3139 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3140 // or the dependency between the instructions.
3141 if ((N0.getOpcode() == ISD::ADD ||
3142 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3143 N0.getValue(1) != CarryIn)) &&
3144 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3145 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3146 N0.getOperand(0), N0.getOperand(1), CarryIn);
3149 * When one of the addcarry argument is itself a carry, we may be facing
3150 * a diamond carry propagation. In which case we try to transform the DAG
3151 * to ensure linear carry propagation if that is possible.
3153 if (auto Y = getAsCarry(TLI, N1)) {
3154 // Because both are carries, Y and Z can be swapped.
3155 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3156 return R;
3157 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3158 return R;
3161 return SDValue();
3164 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3165 // clamp/truncation if necessary.
3166 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3167 SDValue RHS, SelectionDAG &DAG,
3168 const SDLoc &DL) {
3169 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3170 "Illegal truncation");
3172 if (DstVT == SrcVT)
3173 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3175 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3176 // clamping RHS.
3177 APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3178 DstVT.getScalarSizeInBits());
3179 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3180 return SDValue();
3182 SDValue SatLimit =
3183 DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3184 DstVT.getScalarSizeInBits()),
3185 DL, SrcVT);
3186 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3187 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3188 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3189 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3192 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3193 // usubsat(a,b), optionally as a truncated type.
3194 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3195 if (N->getOpcode() != ISD::SUB ||
3196 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3197 return SDValue();
3199 EVT SubVT = N->getValueType(0);
3200 SDValue Op0 = N->getOperand(0);
3201 SDValue Op1 = N->getOperand(1);
3203 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3204 // they may be converted to usubsat(a,b).
3205 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3206 SDValue MaxLHS = Op0.getOperand(0);
3207 SDValue MaxRHS = Op0.getOperand(1);
3208 if (MaxLHS == Op1)
3209 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3210 if (MaxRHS == Op1)
3211 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3214 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3215 SDValue MinLHS = Op1.getOperand(0);
3216 SDValue MinRHS = Op1.getOperand(1);
3217 if (MinLHS == Op0)
3218 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3219 if (MinRHS == Op0)
3220 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3223 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3224 if (Op1.getOpcode() == ISD::TRUNCATE &&
3225 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3226 Op1.getOperand(0).hasOneUse()) {
3227 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3228 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3229 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3230 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3231 DAG, SDLoc(N));
3232 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3233 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3234 DAG, SDLoc(N));
3237 return SDValue();
3240 // Since it may not be valid to emit a fold to zero for vector initializers
3241 // check if we can before folding.
3242 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3243 SelectionDAG &DAG, bool LegalOperations) {
3244 if (!VT.isVector())
3245 return DAG.getConstant(0, DL, VT);
3246 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3247 return DAG.getConstant(0, DL, VT);
3248 return SDValue();
3251 SDValue DAGCombiner::visitSUB(SDNode *N) {
3252 SDValue N0 = N->getOperand(0);
3253 SDValue N1 = N->getOperand(1);
3254 EVT VT = N0.getValueType();
3255 SDLoc DL(N);
3257 // fold (sub x, x) -> 0
3258 // FIXME: Refactor this and xor and other similar operations together.
3259 if (N0 == N1)
3260 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3262 // fold (sub c1, c2) -> c3
3263 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3264 return C;
3266 // fold vector ops
3267 if (VT.isVector()) {
3268 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3269 return FoldedVOp;
3271 // fold (sub x, 0) -> x, vector edition
3272 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3273 return N0;
3276 if (SDValue NewSel = foldBinOpIntoSelect(N))
3277 return NewSel;
3279 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3281 // fold (sub x, c) -> (add x, -c)
3282 if (N1C) {
3283 return DAG.getNode(ISD::ADD, DL, VT, N0,
3284 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3287 if (isNullOrNullSplat(N0)) {
3288 unsigned BitWidth = VT.getScalarSizeInBits();
3289 // Right-shifting everything out but the sign bit followed by negation is
3290 // the same as flipping arithmetic/logical shift type without the negation:
3291 // -(X >>u 31) -> (X >>s 31)
3292 // -(X >>s 31) -> (X >>u 31)
3293 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3294 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3295 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3296 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3297 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3298 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3302 // 0 - X --> 0 if the sub is NUW.
3303 if (N->getFlags().hasNoUnsignedWrap())
3304 return N0;
3306 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3307 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3308 // N1 must be 0 because negating the minimum signed value is undefined.
3309 if (N->getFlags().hasNoSignedWrap())
3310 return N0;
3312 // 0 - X --> X if X is 0 or the minimum signed value.
3313 return N1;
3316 // Convert 0 - abs(x).
3317 if (N1->getOpcode() == ISD::ABS &&
3318 !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3319 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3320 return Result;
3322 // Fold neg(splat(neg(x)) -> splat(x)
3323 if (VT.isVector()) {
3324 SDValue N1S = DAG.getSplatValue(N1, true);
3325 if (N1S && N1S.getOpcode() == ISD::SUB &&
3326 isNullConstant(N1S.getOperand(0))) {
3327 if (VT.isScalableVector())
3328 return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3329 return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3334 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3335 if (isAllOnesOrAllOnesSplat(N0))
3336 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3338 // fold (A - (0-B)) -> A+B
3339 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3340 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3342 // fold A-(A-B) -> B
3343 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3344 return N1.getOperand(1);
3346 // fold (A+B)-A -> B
3347 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3348 return N0.getOperand(1);
3350 // fold (A+B)-B -> A
3351 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3352 return N0.getOperand(0);
3354 // fold (A+C1)-C2 -> A+(C1-C2)
3355 if (N0.getOpcode() == ISD::ADD &&
3356 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3357 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3358 SDValue NewC =
3359 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3360 assert(NewC && "Constant folding failed");
3361 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3364 // fold C2-(A+C1) -> (C2-C1)-A
3365 if (N1.getOpcode() == ISD::ADD) {
3366 SDValue N11 = N1.getOperand(1);
3367 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3368 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3369 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3370 assert(NewC && "Constant folding failed");
3371 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3375 // fold (A-C1)-C2 -> A-(C1+C2)
3376 if (N0.getOpcode() == ISD::SUB &&
3377 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3378 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3379 SDValue NewC =
3380 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3381 assert(NewC && "Constant folding failed");
3382 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3385 // fold (c1-A)-c2 -> (c1-c2)-A
3386 if (N0.getOpcode() == ISD::SUB &&
3387 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3388 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3389 SDValue NewC =
3390 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3391 assert(NewC && "Constant folding failed");
3392 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3395 // fold ((A+(B+or-C))-B) -> A+or-C
3396 if (N0.getOpcode() == ISD::ADD &&
3397 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3398 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3399 N0.getOperand(1).getOperand(0) == N1)
3400 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3401 N0.getOperand(1).getOperand(1));
3403 // fold ((A+(C+B))-B) -> A+C
3404 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3405 N0.getOperand(1).getOperand(1) == N1)
3406 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3407 N0.getOperand(1).getOperand(0));
3409 // fold ((A-(B-C))-C) -> A-B
3410 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3411 N0.getOperand(1).getOperand(1) == N1)
3412 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3413 N0.getOperand(1).getOperand(0));
3415 // fold (A-(B-C)) -> A+(C-B)
3416 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3417 return DAG.getNode(ISD::ADD, DL, VT, N0,
3418 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3419 N1.getOperand(0)));
3421 // A - (A & B) -> A & (~B)
3422 if (N1.getOpcode() == ISD::AND) {
3423 SDValue A = N1.getOperand(0);
3424 SDValue B = N1.getOperand(1);
3425 if (A != N0)
3426 std::swap(A, B);
3427 if (A == N0 &&
3428 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3429 SDValue InvB =
3430 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3431 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3435 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3436 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3437 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3438 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3439 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3440 N1.getOperand(0).getOperand(1),
3441 N1.getOperand(1));
3442 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3444 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3445 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3446 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3447 N1.getOperand(0),
3448 N1.getOperand(1).getOperand(1));
3449 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3453 // If either operand of a sub is undef, the result is undef
3454 if (N0.isUndef())
3455 return N0;
3456 if (N1.isUndef())
3457 return N1;
3459 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3460 return V;
3462 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3463 return V;
3465 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3466 return V;
3468 if (SDValue V = foldSubToUSubSat(VT, N))
3469 return V;
3471 // (x - y) - 1 -> add (xor y, -1), x
3472 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3473 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3474 DAG.getAllOnesConstant(DL, VT));
3475 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3478 // Look for:
3479 // sub y, (xor x, -1)
3480 // And if the target does not like this form then turn into:
3481 // add (add x, y), 1
3482 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3483 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3484 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3487 // Hoist one-use addition by non-opaque constant:
3488 // (x + C) - y -> (x - y) + C
3489 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3490 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3491 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3492 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3494 // y - (x + C) -> (y - x) - C
3495 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3496 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3497 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3498 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3500 // (x - C) - y -> (x - y) - C
3501 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3502 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3503 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3504 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3505 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3507 // (C - x) - y -> C - (x + y)
3508 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3509 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3510 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3511 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3514 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3515 // rather than 'sub 0/1' (the sext should get folded).
3516 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3517 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3518 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3519 TLI.getBooleanContents(VT) ==
3520 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3521 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3522 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3525 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3526 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3527 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3528 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3529 SDValue S0 = N1.getOperand(0);
3530 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3531 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3532 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3533 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3537 // If the relocation model supports it, consider symbol offsets.
3538 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3539 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3540 // fold (sub Sym, c) -> Sym-c
3541 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3542 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3543 GA->getOffset() -
3544 (uint64_t)N1C->getSExtValue());
3545 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3546 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3547 if (GA->getGlobal() == GB->getGlobal())
3548 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3549 DL, VT);
3552 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3553 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3554 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3555 if (TN->getVT() == MVT::i1) {
3556 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3557 DAG.getConstant(1, DL, VT));
3558 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3562 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3563 if (N1.getOpcode() == ISD::VSCALE) {
3564 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3565 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3568 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3569 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3570 APInt NewStep = -N1.getConstantOperandAPInt(0);
3571 return DAG.getNode(ISD::ADD, DL, VT, N0,
3572 DAG.getStepVector(DL, VT, NewStep));
3575 // Prefer an add for more folding potential and possibly better codegen:
3576 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3577 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3578 SDValue ShAmt = N1.getOperand(1);
3579 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3580 if (ShAmtC &&
3581 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3582 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3583 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3587 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3588 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3589 if (SDValue Carry = getAsCarry(TLI, N0)) {
3590 SDValue X = N1;
3591 SDValue Zero = DAG.getConstant(0, DL, VT);
3592 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3593 return DAG.getNode(ISD::ADDCARRY, DL,
3594 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3595 Carry);
3599 return SDValue();
3602 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3603 SDValue N0 = N->getOperand(0);
3604 SDValue N1 = N->getOperand(1);
3605 EVT VT = N0.getValueType();
3606 SDLoc DL(N);
3608 // fold (sub_sat x, undef) -> 0
3609 if (N0.isUndef() || N1.isUndef())
3610 return DAG.getConstant(0, DL, VT);
3612 // fold (sub_sat x, x) -> 0
3613 if (N0 == N1)
3614 return DAG.getConstant(0, DL, VT);
3616 // fold (sub_sat c1, c2) -> c3
3617 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3618 return C;
3620 // fold vector ops
3621 if (VT.isVector()) {
3622 // TODO SimplifyVBinOp
3624 // fold (sub_sat x, 0) -> x, vector edition
3625 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3626 return N0;
3629 // fold (sub_sat x, 0) -> x
3630 if (isNullConstant(N1))
3631 return N0;
3633 return SDValue();
3636 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3637 SDValue N0 = N->getOperand(0);
3638 SDValue N1 = N->getOperand(1);
3639 EVT VT = N0.getValueType();
3640 SDLoc DL(N);
3642 // If the flag result is dead, turn this into an SUB.
3643 if (!N->hasAnyUseOfValue(1))
3644 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3645 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3647 // fold (subc x, x) -> 0 + no borrow
3648 if (N0 == N1)
3649 return CombineTo(N, DAG.getConstant(0, DL, VT),
3650 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3652 // fold (subc x, 0) -> x + no borrow
3653 if (isNullConstant(N1))
3654 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3656 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3657 if (isAllOnesConstant(N0))
3658 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3659 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3661 return SDValue();
3664 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3665 SDValue N0 = N->getOperand(0);
3666 SDValue N1 = N->getOperand(1);
3667 EVT VT = N0.getValueType();
3668 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3670 EVT CarryVT = N->getValueType(1);
3671 SDLoc DL(N);
3673 // If the flag result is dead, turn this into an SUB.
3674 if (!N->hasAnyUseOfValue(1))
3675 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3676 DAG.getUNDEF(CarryVT));
3678 // fold (subo x, x) -> 0 + no borrow
3679 if (N0 == N1)
3680 return CombineTo(N, DAG.getConstant(0, DL, VT),
3681 DAG.getConstant(0, DL, CarryVT));
3683 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3685 // fold (subox, c) -> (addo x, -c)
3686 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3687 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3688 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3691 // fold (subo x, 0) -> x + no borrow
3692 if (isNullOrNullSplat(N1))
3693 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3695 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3696 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3697 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3698 DAG.getConstant(0, DL, CarryVT));
3700 return SDValue();
3703 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3704 SDValue N0 = N->getOperand(0);
3705 SDValue N1 = N->getOperand(1);
3706 SDValue CarryIn = N->getOperand(2);
3708 // fold (sube x, y, false) -> (subc x, y)
3709 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3710 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3712 return SDValue();
3715 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3716 SDValue N0 = N->getOperand(0);
3717 SDValue N1 = N->getOperand(1);
3718 SDValue CarryIn = N->getOperand(2);
3720 // fold (subcarry x, y, false) -> (usubo x, y)
3721 if (isNullConstant(CarryIn)) {
3722 if (!LegalOperations ||
3723 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3724 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3727 return SDValue();
3730 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3731 SDValue N0 = N->getOperand(0);
3732 SDValue N1 = N->getOperand(1);
3733 SDValue CarryIn = N->getOperand(2);
3735 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3736 if (isNullConstant(CarryIn)) {
3737 if (!LegalOperations ||
3738 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3739 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3742 return SDValue();
3745 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3746 // UMULFIXSAT here.
3747 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3748 SDValue N0 = N->getOperand(0);
3749 SDValue N1 = N->getOperand(1);
3750 SDValue Scale = N->getOperand(2);
3751 EVT VT = N0.getValueType();
3753 // fold (mulfix x, undef, scale) -> 0
3754 if (N0.isUndef() || N1.isUndef())
3755 return DAG.getConstant(0, SDLoc(N), VT);
3757 // Canonicalize constant to RHS (vector doesn't have to splat)
3758 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3759 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3760 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3762 // fold (mulfix x, 0, scale) -> 0
3763 if (isNullConstant(N1))
3764 return DAG.getConstant(0, SDLoc(N), VT);
3766 return SDValue();
3769 SDValue DAGCombiner::visitMUL(SDNode *N) {
3770 SDValue N0 = N->getOperand(0);
3771 SDValue N1 = N->getOperand(1);
3772 EVT VT = N0.getValueType();
3774 // fold (mul x, undef) -> 0
3775 if (N0.isUndef() || N1.isUndef())
3776 return DAG.getConstant(0, SDLoc(N), VT);
3778 // fold (mul c1, c2) -> c1*c2
3779 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3780 return C;
3782 // canonicalize constant to RHS (vector doesn't have to splat)
3783 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3784 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3785 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3787 bool N1IsConst = false;
3788 bool N1IsOpaqueConst = false;
3789 APInt ConstValue1;
3791 // fold vector ops
3792 if (VT.isVector()) {
3793 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3794 return FoldedVOp;
3796 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3797 assert((!N1IsConst ||
3798 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3799 "Splat APInt should be element width");
3800 } else {
3801 N1IsConst = isa<ConstantSDNode>(N1);
3802 if (N1IsConst) {
3803 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3804 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3808 // fold (mul x, 0) -> 0
3809 if (N1IsConst && ConstValue1.isZero())
3810 return N1;
3812 // fold (mul x, 1) -> x
3813 if (N1IsConst && ConstValue1.isOne())
3814 return N0;
3816 if (SDValue NewSel = foldBinOpIntoSelect(N))
3817 return NewSel;
3819 // fold (mul x, -1) -> 0-x
3820 if (N1IsConst && ConstValue1.isAllOnes()) {
3821 SDLoc DL(N);
3822 return DAG.getNode(ISD::SUB, DL, VT,
3823 DAG.getConstant(0, DL, VT), N0);
3826 // fold (mul x, (1 << c)) -> x << c
3827 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3828 DAG.isKnownToBeAPowerOfTwo(N1) &&
3829 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3830 SDLoc DL(N);
3831 SDValue LogBase2 = BuildLogBase2(N1, DL);
3832 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3833 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3834 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3837 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3838 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3839 unsigned Log2Val = (-ConstValue1).logBase2();
3840 SDLoc DL(N);
3841 // FIXME: If the input is something that is easily negated (e.g. a
3842 // single-use add), we should put the negate there.
3843 return DAG.getNode(ISD::SUB, DL, VT,
3844 DAG.getConstant(0, DL, VT),
3845 DAG.getNode(ISD::SHL, DL, VT, N0,
3846 DAG.getConstant(Log2Val, DL,
3847 getShiftAmountTy(N0.getValueType()))));
3850 // Try to transform:
3851 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3852 // mul x, (2^N + 1) --> add (shl x, N), x
3853 // mul x, (2^N - 1) --> sub (shl x, N), x
3854 // Examples: x * 33 --> (x << 5) + x
3855 // x * 15 --> (x << 4) - x
3856 // x * -33 --> -((x << 5) + x)
3857 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3858 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3859 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3860 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3861 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3862 // x * 0xf800 --> (x << 16) - (x << 11)
3863 // x * -0x8800 --> -((x << 15) + (x << 11))
3864 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3865 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3866 // TODO: We could handle more general decomposition of any constant by
3867 // having the target set a limit on number of ops and making a
3868 // callback to determine that sequence (similar to sqrt expansion).
3869 unsigned MathOp = ISD::DELETED_NODE;
3870 APInt MulC = ConstValue1.abs();
3871 // The constant `2` should be treated as (2^0 + 1).
3872 unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3873 MulC.lshrInPlace(TZeros);
3874 if ((MulC - 1).isPowerOf2())
3875 MathOp = ISD::ADD;
3876 else if ((MulC + 1).isPowerOf2())
3877 MathOp = ISD::SUB;
3879 if (MathOp != ISD::DELETED_NODE) {
3880 unsigned ShAmt =
3881 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3882 ShAmt += TZeros;
3883 assert(ShAmt < VT.getScalarSizeInBits() &&
3884 "multiply-by-constant generated out of bounds shift");
3885 SDLoc DL(N);
3886 SDValue Shl =
3887 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3888 SDValue R =
3889 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3890 DAG.getNode(ISD::SHL, DL, VT, N0,
3891 DAG.getConstant(TZeros, DL, VT)))
3892 : DAG.getNode(MathOp, DL, VT, Shl, N0);
3893 if (ConstValue1.isNegative())
3894 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3895 return R;
3899 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3900 if (N0.getOpcode() == ISD::SHL &&
3901 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3902 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3903 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3904 if (isConstantOrConstantVector(C3))
3905 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3908 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3909 // use.
3911 SDValue Sh, Y;
3913 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3914 if (N0.getOpcode() == ISD::SHL &&
3915 isConstantOrConstantVector(N0.getOperand(1)) &&
3916 N0.getNode()->hasOneUse()) {
3917 Sh = N0; Y = N1;
3918 } else if (N1.getOpcode() == ISD::SHL &&
3919 isConstantOrConstantVector(N1.getOperand(1)) &&
3920 N1.getNode()->hasOneUse()) {
3921 Sh = N1; Y = N0;
3924 if (Sh.getNode()) {
3925 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3926 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3930 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3931 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3932 N0.getOpcode() == ISD::ADD &&
3933 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3934 isMulAddWithConstProfitable(N, N0, N1))
3935 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3936 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3937 N0.getOperand(0), N1),
3938 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3939 N0.getOperand(1), N1));
3941 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3942 if (N0.getOpcode() == ISD::VSCALE)
3943 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3944 const APInt &C0 = N0.getConstantOperandAPInt(0);
3945 const APInt &C1 = NC1->getAPIntValue();
3946 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3949 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3950 APInt MulVal;
3951 if (N0.getOpcode() == ISD::STEP_VECTOR)
3952 if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3953 const APInt &C0 = N0.getConstantOperandAPInt(0);
3954 APInt NewStep = C0 * MulVal;
3955 return DAG.getStepVector(SDLoc(N), VT, NewStep);
3958 // Fold ((mul x, 0/undef) -> 0,
3959 // (mul x, 1) -> x) -> x)
3960 // -> and(x, mask)
3961 // We can replace vectors with '0' and '1' factors with a clearing mask.
3962 if (VT.isFixedLengthVector()) {
3963 unsigned NumElts = VT.getVectorNumElements();
3964 SmallBitVector ClearMask;
3965 ClearMask.reserve(NumElts);
3966 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3967 if (!V || V->isZero()) {
3968 ClearMask.push_back(true);
3969 return true;
3971 ClearMask.push_back(false);
3972 return V->isOne();
3974 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3975 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3976 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3977 SDLoc DL(N);
3978 EVT LegalSVT = N1.getOperand(0).getValueType();
3979 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3980 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3981 SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3982 for (unsigned I = 0; I != NumElts; ++I)
3983 if (ClearMask[I])
3984 Mask[I] = Zero;
3985 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3989 // reassociate mul
3990 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3991 return RMUL;
3993 return SDValue();
3996 /// Return true if divmod libcall is available.
3997 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3998 const TargetLowering &TLI) {
3999 RTLIB::Libcall LC;
4000 EVT NodeType = Node->getValueType(0);
4001 if (!NodeType.isSimple())
4002 return false;
4003 switch (NodeType.getSimpleVT().SimpleTy) {
4004 default: return false; // No libcall for vector types.
4005 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4006 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4007 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4008 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4009 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4012 return TLI.getLibcallName(LC) != nullptr;
4015 /// Issue divrem if both quotient and remainder are needed.
4016 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4017 if (Node->use_empty())
4018 return SDValue(); // This is a dead node, leave it alone.
4020 unsigned Opcode = Node->getOpcode();
4021 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4022 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4024 // DivMod lib calls can still work on non-legal types if using lib-calls.
4025 EVT VT = Node->getValueType(0);
4026 if (VT.isVector() || !VT.isInteger())
4027 return SDValue();
4029 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4030 return SDValue();
4032 // If DIVREM is going to get expanded into a libcall,
4033 // but there is no libcall available, then don't combine.
4034 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4035 !isDivRemLibcallAvailable(Node, isSigned, TLI))
4036 return SDValue();
4038 // If div is legal, it's better to do the normal expansion
4039 unsigned OtherOpcode = 0;
4040 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4041 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4042 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4043 return SDValue();
4044 } else {
4045 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4046 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4047 return SDValue();
4050 SDValue Op0 = Node->getOperand(0);
4051 SDValue Op1 = Node->getOperand(1);
4052 SDValue combined;
4053 for (SDNode *User : Op0.getNode()->uses()) {
4054 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4055 User->use_empty())
4056 continue;
4057 // Convert the other matching node(s), too;
4058 // otherwise, the DIVREM may get target-legalized into something
4059 // target-specific that we won't be able to recognize.
4060 unsigned UserOpc = User->getOpcode();
4061 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4062 User->getOperand(0) == Op0 &&
4063 User->getOperand(1) == Op1) {
4064 if (!combined) {
4065 if (UserOpc == OtherOpcode) {
4066 SDVTList VTs = DAG.getVTList(VT, VT);
4067 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4068 } else if (UserOpc == DivRemOpc) {
4069 combined = SDValue(User, 0);
4070 } else {
4071 assert(UserOpc == Opcode);
4072 continue;
4075 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4076 CombineTo(User, combined);
4077 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4078 CombineTo(User, combined.getValue(1));
4081 return combined;
4084 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4085 SDValue N0 = N->getOperand(0);
4086 SDValue N1 = N->getOperand(1);
4087 EVT VT = N->getValueType(0);
4088 SDLoc DL(N);
4090 unsigned Opc = N->getOpcode();
4091 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4092 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4094 // X / undef -> undef
4095 // X % undef -> undef
4096 // X / 0 -> undef
4097 // X % 0 -> undef
4098 // NOTE: This includes vectors where any divisor element is zero/undef.
4099 if (DAG.isUndef(Opc, {N0, N1}))
4100 return DAG.getUNDEF(VT);
4102 // undef / X -> 0
4103 // undef % X -> 0
4104 if (N0.isUndef())
4105 return DAG.getConstant(0, DL, VT);
4107 // 0 / X -> 0
4108 // 0 % X -> 0
4109 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4110 if (N0C && N0C->isZero())
4111 return N0;
4113 // X / X -> 1
4114 // X % X -> 0
4115 if (N0 == N1)
4116 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4118 // X / 1 -> X
4119 // X % 1 -> 0
4120 // If this is a boolean op (single-bit element type), we can't have
4121 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4122 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4123 // it's a 1.
4124 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4125 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4127 return SDValue();
4130 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4131 SDValue N0 = N->getOperand(0);
4132 SDValue N1 = N->getOperand(1);
4133 EVT VT = N->getValueType(0);
4134 EVT CCVT = getSetCCResultType(VT);
4135 SDLoc DL(N);
4137 // fold (sdiv c1, c2) -> c1/c2
4138 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4139 return C;
4141 // fold vector ops
4142 if (VT.isVector())
4143 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4144 return FoldedVOp;
4146 // fold (sdiv X, -1) -> 0-X
4147 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4148 if (N1C && N1C->isAllOnes())
4149 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4151 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4152 if (N1C && N1C->getAPIntValue().isMinSignedValue())
4153 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4154 DAG.getConstant(1, DL, VT),
4155 DAG.getConstant(0, DL, VT));
4157 if (SDValue V = simplifyDivRem(N, DAG))
4158 return V;
4160 if (SDValue NewSel = foldBinOpIntoSelect(N))
4161 return NewSel;
4163 // If we know the sign bits of both operands are zero, strength reduce to a
4164 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4165 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4166 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4168 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4169 // If the corresponding remainder node exists, update its users with
4170 // (Dividend - (Quotient * Divisor).
4171 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4172 { N0, N1 })) {
4173 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4174 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4175 AddToWorklist(Mul.getNode());
4176 AddToWorklist(Sub.getNode());
4177 CombineTo(RemNode, Sub);
4179 return V;
4182 // sdiv, srem -> sdivrem
4183 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4184 // true. Otherwise, we break the simplification logic in visitREM().
4185 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4186 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4187 if (SDValue DivRem = useDivRem(N))
4188 return DivRem;
4190 return SDValue();
4193 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4194 SDLoc DL(N);
4195 EVT VT = N->getValueType(0);
4196 EVT CCVT = getSetCCResultType(VT);
4197 unsigned BitWidth = VT.getScalarSizeInBits();
4199 // Helper for determining whether a value is a power-2 constant scalar or a
4200 // vector of such elements.
4201 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4202 if (C->isZero() || C->isOpaque())
4203 return false;
4204 if (C->getAPIntValue().isPowerOf2())
4205 return true;
4206 if (C->getAPIntValue().isNegatedPowerOf2())
4207 return true;
4208 return false;
4211 // fold (sdiv X, pow2) -> simple ops after legalize
4212 // FIXME: We check for the exact bit here because the generic lowering gives
4213 // better results in that case. The target-specific lowering should learn how
4214 // to handle exact sdivs efficiently.
4215 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4216 // Target-specific implementation of sdiv x, pow2.
4217 if (SDValue Res = BuildSDIVPow2(N))
4218 return Res;
4220 // Create constants that are functions of the shift amount value.
4221 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4222 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4223 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4224 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4225 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4226 if (!isConstantOrConstantVector(Inexact))
4227 return SDValue();
4229 // Splat the sign bit into the register
4230 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4231 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4232 AddToWorklist(Sign.getNode());
4234 // Add (N0 < 0) ? abs2 - 1 : 0;
4235 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4236 AddToWorklist(Srl.getNode());
4237 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4238 AddToWorklist(Add.getNode());
4239 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4240 AddToWorklist(Sra.getNode());
4242 // Special case: (sdiv X, 1) -> X
4243 // Special Case: (sdiv X, -1) -> 0-X
4244 SDValue One = DAG.getConstant(1, DL, VT);
4245 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4246 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4247 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4248 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4249 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4251 // If dividing by a positive value, we're done. Otherwise, the result must
4252 // be negated.
4253 SDValue Zero = DAG.getConstant(0, DL, VT);
4254 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4256 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4257 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4258 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4259 return Res;
4262 // If integer divide is expensive and we satisfy the requirements, emit an
4263 // alternate sequence. Targets may check function attributes for size/speed
4264 // trade-offs.
4265 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4266 if (isConstantOrConstantVector(N1) &&
4267 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4268 if (SDValue Op = BuildSDIV(N))
4269 return Op;
4271 return SDValue();
4274 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4275 SDValue N0 = N->getOperand(0);
4276 SDValue N1 = N->getOperand(1);
4277 EVT VT = N->getValueType(0);
4278 EVT CCVT = getSetCCResultType(VT);
4279 SDLoc DL(N);
4281 // fold (udiv c1, c2) -> c1/c2
4282 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4283 return C;
4285 // fold vector ops
4286 if (VT.isVector())
4287 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4288 return FoldedVOp;
4290 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4291 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4292 if (N1C && N1C->isAllOnes())
4293 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4294 DAG.getConstant(1, DL, VT),
4295 DAG.getConstant(0, DL, VT));
4297 if (SDValue V = simplifyDivRem(N, DAG))
4298 return V;
4300 if (SDValue NewSel = foldBinOpIntoSelect(N))
4301 return NewSel;
4303 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4304 // If the corresponding remainder node exists, update its users with
4305 // (Dividend - (Quotient * Divisor).
4306 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4307 { N0, N1 })) {
4308 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4309 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4310 AddToWorklist(Mul.getNode());
4311 AddToWorklist(Sub.getNode());
4312 CombineTo(RemNode, Sub);
4314 return V;
4317 // sdiv, srem -> sdivrem
4318 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4319 // true. Otherwise, we break the simplification logic in visitREM().
4320 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4321 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4322 if (SDValue DivRem = useDivRem(N))
4323 return DivRem;
4325 return SDValue();
4328 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4329 SDLoc DL(N);
4330 EVT VT = N->getValueType(0);
4332 // fold (udiv x, (1 << c)) -> x >>u c
4333 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4334 DAG.isKnownToBeAPowerOfTwo(N1)) {
4335 SDValue LogBase2 = BuildLogBase2(N1, DL);
4336 AddToWorklist(LogBase2.getNode());
4338 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4339 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4340 AddToWorklist(Trunc.getNode());
4341 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4344 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4345 if (N1.getOpcode() == ISD::SHL) {
4346 SDValue N10 = N1.getOperand(0);
4347 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4348 DAG.isKnownToBeAPowerOfTwo(N10)) {
4349 SDValue LogBase2 = BuildLogBase2(N10, DL);
4350 AddToWorklist(LogBase2.getNode());
4352 EVT ADDVT = N1.getOperand(1).getValueType();
4353 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4354 AddToWorklist(Trunc.getNode());
4355 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4356 AddToWorklist(Add.getNode());
4357 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4361 // fold (udiv x, c) -> alternate
4362 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4363 if (isConstantOrConstantVector(N1) &&
4364 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4365 if (SDValue Op = BuildUDIV(N))
4366 return Op;
4368 return SDValue();
4371 // handles ISD::SREM and ISD::UREM
4372 SDValue DAGCombiner::visitREM(SDNode *N) {
4373 unsigned Opcode = N->getOpcode();
4374 SDValue N0 = N->getOperand(0);
4375 SDValue N1 = N->getOperand(1);
4376 EVT VT = N->getValueType(0);
4377 EVT CCVT = getSetCCResultType(VT);
4379 bool isSigned = (Opcode == ISD::SREM);
4380 SDLoc DL(N);
4382 // fold (rem c1, c2) -> c1%c2
4383 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4384 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4385 return C;
4387 // fold (urem X, -1) -> select(X == -1, 0, x)
4388 if (!isSigned && N1C && N1C->isAllOnes())
4389 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4390 DAG.getConstant(0, DL, VT), N0);
4392 if (SDValue V = simplifyDivRem(N, DAG))
4393 return V;
4395 if (SDValue NewSel = foldBinOpIntoSelect(N))
4396 return NewSel;
4398 if (isSigned) {
4399 // If we know the sign bits of both operands are zero, strength reduce to a
4400 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4401 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4402 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4403 } else {
4404 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4405 // fold (urem x, pow2) -> (and x, pow2-1)
4406 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4407 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4408 AddToWorklist(Add.getNode());
4409 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4411 if (N1.getOpcode() == ISD::SHL &&
4412 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4413 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4414 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4415 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4416 AddToWorklist(Add.getNode());
4417 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4421 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4423 // If X/C can be simplified by the division-by-constant logic, lower
4424 // X%C to the equivalent of X-X/C*C.
4425 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4426 // speculative DIV must not cause a DIVREM conversion. We guard against this
4427 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4428 // combine will not return a DIVREM. Regardless, checking cheapness here
4429 // makes sense since the simplification results in fatter code.
4430 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4431 SDValue OptimizedDiv =
4432 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4433 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4434 // If the equivalent Div node also exists, update its users.
4435 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4436 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4437 { N0, N1 }))
4438 CombineTo(DivNode, OptimizedDiv);
4439 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4440 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4441 AddToWorklist(OptimizedDiv.getNode());
4442 AddToWorklist(Mul.getNode());
4443 return Sub;
4447 // sdiv, srem -> sdivrem
4448 if (SDValue DivRem = useDivRem(N))
4449 return DivRem.getValue(1);
4451 return SDValue();
4454 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4455 SDValue N0 = N->getOperand(0);
4456 SDValue N1 = N->getOperand(1);
4457 EVT VT = N->getValueType(0);
4458 SDLoc DL(N);
4460 // fold (mulhs c1, c2)
4461 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4462 return C;
4464 // canonicalize constant to RHS.
4465 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4466 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4467 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4469 if (VT.isVector()) {
4470 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4471 return FoldedVOp;
4473 // fold (mulhs x, 0) -> 0
4474 // do not return N0/N1, because undef node may exist.
4475 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4476 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4477 return DAG.getConstant(0, DL, VT);
4480 // fold (mulhs x, 0) -> 0
4481 if (isNullConstant(N1))
4482 return N1;
4483 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4484 if (isOneConstant(N1))
4485 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4486 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4487 getShiftAmountTy(N0.getValueType())));
4489 // fold (mulhs x, undef) -> 0
4490 if (N0.isUndef() || N1.isUndef())
4491 return DAG.getConstant(0, DL, VT);
4493 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4494 // plus a shift.
4495 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4496 !VT.isVector()) {
4497 MVT Simple = VT.getSimpleVT();
4498 unsigned SimpleSize = Simple.getSizeInBits();
4499 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4500 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4501 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4502 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4503 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4504 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4505 DAG.getConstant(SimpleSize, DL,
4506 getShiftAmountTy(N1.getValueType())));
4507 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4511 return SDValue();
4514 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4515 SDValue N0 = N->getOperand(0);
4516 SDValue N1 = N->getOperand(1);
4517 EVT VT = N->getValueType(0);
4518 SDLoc DL(N);
4520 // fold (mulhu c1, c2)
4521 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4522 return C;
4524 // canonicalize constant to RHS.
4525 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4526 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4527 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4529 if (VT.isVector()) {
4530 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4531 return FoldedVOp;
4533 // fold (mulhu x, 0) -> 0
4534 // do not return N0/N1, because undef node may exist.
4535 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4536 ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4537 return DAG.getConstant(0, DL, VT);
4540 // fold (mulhu x, 0) -> 0
4541 if (isNullConstant(N1))
4542 return N1;
4543 // fold (mulhu x, 1) -> 0
4544 if (isOneConstant(N1))
4545 return DAG.getConstant(0, DL, N0.getValueType());
4546 // fold (mulhu x, undef) -> 0
4547 if (N0.isUndef() || N1.isUndef())
4548 return DAG.getConstant(0, DL, VT);
4550 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4551 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4552 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4553 unsigned NumEltBits = VT.getScalarSizeInBits();
4554 SDValue LogBase2 = BuildLogBase2(N1, DL);
4555 SDValue SRLAmt = DAG.getNode(
4556 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4557 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4558 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4559 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4562 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4563 // plus a shift.
4564 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4565 !VT.isVector()) {
4566 MVT Simple = VT.getSimpleVT();
4567 unsigned SimpleSize = Simple.getSizeInBits();
4568 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4569 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4570 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4571 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4572 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4573 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4574 DAG.getConstant(SimpleSize, DL,
4575 getShiftAmountTy(N1.getValueType())));
4576 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4580 // Simplify the operands using demanded-bits information.
4581 // We don't have demanded bits support for MULHU so this just enables constant
4582 // folding based on known bits.
4583 if (SimplifyDemandedBits(SDValue(N, 0)))
4584 return SDValue(N, 0);
4586 return SDValue();
4589 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4590 /// give the opcodes for the two computations that are being performed. Return
4591 /// true if a simplification was made.
4592 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4593 unsigned HiOp) {
4594 // If the high half is not needed, just compute the low half.
4595 bool HiExists = N->hasAnyUseOfValue(1);
4596 if (!HiExists && (!LegalOperations ||
4597 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4598 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4599 return CombineTo(N, Res, Res);
4602 // If the low half is not needed, just compute the high half.
4603 bool LoExists = N->hasAnyUseOfValue(0);
4604 if (!LoExists && (!LegalOperations ||
4605 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4606 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4607 return CombineTo(N, Res, Res);
4610 // If both halves are used, return as it is.
4611 if (LoExists && HiExists)
4612 return SDValue();
4614 // If the two computed results can be simplified separately, separate them.
4615 if (LoExists) {
4616 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4617 AddToWorklist(Lo.getNode());
4618 SDValue LoOpt = combine(Lo.getNode());
4619 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4620 (!LegalOperations ||
4621 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4622 return CombineTo(N, LoOpt, LoOpt);
4625 if (HiExists) {
4626 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4627 AddToWorklist(Hi.getNode());
4628 SDValue HiOpt = combine(Hi.getNode());
4629 if (HiOpt.getNode() && HiOpt != Hi &&
4630 (!LegalOperations ||
4631 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4632 return CombineTo(N, HiOpt, HiOpt);
4635 return SDValue();
4638 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4639 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4640 return Res;
4642 EVT VT = N->getValueType(0);
4643 SDLoc DL(N);
4645 // If the type is twice as wide is legal, transform the mulhu to a wider
4646 // multiply plus a shift.
4647 if (VT.isSimple() && !VT.isVector()) {
4648 MVT Simple = VT.getSimpleVT();
4649 unsigned SimpleSize = Simple.getSizeInBits();
4650 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4651 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4652 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4653 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4654 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4655 // Compute the high part as N1.
4656 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4657 DAG.getConstant(SimpleSize, DL,
4658 getShiftAmountTy(Lo.getValueType())));
4659 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4660 // Compute the low part as N0.
4661 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4662 return CombineTo(N, Lo, Hi);
4666 return SDValue();
4669 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4670 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4671 return Res;
4673 EVT VT = N->getValueType(0);
4674 SDLoc DL(N);
4676 // (umul_lohi N0, 0) -> (0, 0)
4677 if (isNullConstant(N->getOperand(1))) {
4678 SDValue Zero = DAG.getConstant(0, DL, VT);
4679 return CombineTo(N, Zero, Zero);
4682 // (umul_lohi N0, 1) -> (N0, 0)
4683 if (isOneConstant(N->getOperand(1))) {
4684 SDValue Zero = DAG.getConstant(0, DL, VT);
4685 return CombineTo(N, N->getOperand(0), Zero);
4688 // If the type is twice as wide is legal, transform the mulhu to a wider
4689 // multiply plus a shift.
4690 if (VT.isSimple() && !VT.isVector()) {
4691 MVT Simple = VT.getSimpleVT();
4692 unsigned SimpleSize = Simple.getSizeInBits();
4693 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4694 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4695 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4696 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4697 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4698 // Compute the high part as N1.
4699 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4700 DAG.getConstant(SimpleSize, DL,
4701 getShiftAmountTy(Lo.getValueType())));
4702 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4703 // Compute the low part as N0.
4704 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4705 return CombineTo(N, Lo, Hi);
4709 return SDValue();
4712 SDValue DAGCombiner::visitMULO(SDNode *N) {
4713 SDValue N0 = N->getOperand(0);
4714 SDValue N1 = N->getOperand(1);
4715 EVT VT = N0.getValueType();
4716 bool IsSigned = (ISD::SMULO == N->getOpcode());
4718 EVT CarryVT = N->getValueType(1);
4719 SDLoc DL(N);
4721 ConstantSDNode *N0C = isConstOrConstSplat(N0);
4722 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4724 // fold operation with constant operands.
4725 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4726 // multiple results.
4727 if (N0C && N1C) {
4728 bool Overflow;
4729 APInt Result =
4730 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4731 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4732 return CombineTo(N, DAG.getConstant(Result, DL, VT),
4733 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4736 // canonicalize constant to RHS.
4737 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4738 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4739 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4741 // fold (mulo x, 0) -> 0 + no carry out
4742 if (isNullOrNullSplat(N1))
4743 return CombineTo(N, DAG.getConstant(0, DL, VT),
4744 DAG.getConstant(0, DL, CarryVT));
4746 // (mulo x, 2) -> (addo x, x)
4747 if (N1C && N1C->getAPIntValue() == 2)
4748 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4749 N->getVTList(), N0, N0);
4751 if (IsSigned) {
4752 // A 1 bit SMULO overflows if both inputs are 1.
4753 if (VT.getScalarSizeInBits() == 1) {
4754 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4755 return CombineTo(N, And,
4756 DAG.getSetCC(DL, CarryVT, And,
4757 DAG.getConstant(0, DL, VT), ISD::SETNE));
4760 // Multiplying n * m significant bits yields a result of n + m significant
4761 // bits. If the total number of significant bits does not exceed the
4762 // result bit width (minus 1), there is no overflow.
4763 unsigned SignBits = DAG.ComputeNumSignBits(N0);
4764 if (SignBits > 1)
4765 SignBits += DAG.ComputeNumSignBits(N1);
4766 if (SignBits > VT.getScalarSizeInBits() + 1)
4767 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4768 DAG.getConstant(0, DL, CarryVT));
4769 } else {
4770 KnownBits N1Known = DAG.computeKnownBits(N1);
4771 KnownBits N0Known = DAG.computeKnownBits(N0);
4772 bool Overflow;
4773 (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4774 if (!Overflow)
4775 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4776 DAG.getConstant(0, DL, CarryVT));
4779 return SDValue();
4782 // Function to calculate whether the Min/Max pair of SDNodes (potentially
4783 // swapped around) make a signed saturate pattern, clamping to between a signed
4784 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4785 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
4786 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4787 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
4788 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
4789 SDValue N3, ISD::CondCode CC, unsigned &BW,
4790 bool &Unsigned) {
4791 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4792 ISD::CondCode CC) {
4793 // The compare and select operand should be the same or the select operands
4794 // should be truncated versions of the comparison.
4795 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4796 return 0;
4797 // The constants need to be the same or a truncated version of each other.
4798 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4799 ConstantSDNode *N3C = isConstOrConstSplat(N3);
4800 if (!N1C || !N3C)
4801 return 0;
4802 const APInt &C1 = N1C->getAPIntValue();
4803 const APInt &C2 = N3C->getAPIntValue();
4804 if (C1.getBitWidth() < C2.getBitWidth() ||
4805 C1 != C2.sextOrSelf(C1.getBitWidth()))
4806 return 0;
4807 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
4810 // Check the initial value is a SMIN/SMAX equivalent.
4811 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
4812 if (!Opcode0)
4813 return SDValue();
4815 SDValue N00, N01, N02, N03;
4816 ISD::CondCode N0CC;
4817 switch (N0.getOpcode()) {
4818 case ISD::SMIN:
4819 case ISD::SMAX:
4820 N00 = N02 = N0.getOperand(0);
4821 N01 = N03 = N0.getOperand(1);
4822 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
4823 break;
4824 case ISD::SELECT_CC:
4825 N00 = N0.getOperand(0);
4826 N01 = N0.getOperand(1);
4827 N02 = N0.getOperand(2);
4828 N03 = N0.getOperand(3);
4829 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
4830 break;
4831 case ISD::SELECT:
4832 case ISD::VSELECT:
4833 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
4834 return SDValue();
4835 N00 = N0.getOperand(0).getOperand(0);
4836 N01 = N0.getOperand(0).getOperand(1);
4837 N02 = N0.getOperand(1);
4838 N03 = N0.getOperand(2);
4839 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
4840 break;
4841 default:
4842 return SDValue();
4845 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
4846 if (!Opcode1 || Opcode0 == Opcode1)
4847 return SDValue();
4849 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
4850 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
4851 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
4852 return SDValue();
4854 const APInt &MinC = MinCOp->getAPIntValue();
4855 const APInt &MaxC = MaxCOp->getAPIntValue();
4856 APInt MinCPlus1 = MinC + 1;
4857 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
4858 BW = MinCPlus1.exactLogBase2() + 1;
4859 Unsigned = false;
4860 return N02;
4863 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
4864 BW = MinCPlus1.exactLogBase2();
4865 Unsigned = true;
4866 return N02;
4869 return SDValue();
4872 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
4873 SDValue N3, ISD::CondCode CC,
4874 SelectionDAG &DAG) {
4875 unsigned BW;
4876 bool Unsigned;
4877 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
4878 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
4879 return SDValue();
4880 EVT FPVT = Fp.getOperand(0).getValueType();
4881 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
4882 if (FPVT.isVector())
4883 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
4884 FPVT.getVectorElementCount());
4885 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
4886 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
4887 return SDValue();
4888 SDLoc DL(Fp);
4889 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
4890 DAG.getValueType(NewVT.getScalarType()));
4891 return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
4892 : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
4895 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4896 SDValue N0 = N->getOperand(0);
4897 SDValue N1 = N->getOperand(1);
4898 EVT VT = N0.getValueType();
4899 unsigned Opcode = N->getOpcode();
4900 SDLoc DL(N);
4902 // fold operation with constant operands.
4903 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4904 return C;
4906 // canonicalize constant to RHS
4907 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4908 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4909 return DAG.getNode(Opcode, DL, VT, N1, N0);
4911 // fold vector ops
4912 if (VT.isVector())
4913 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4914 return FoldedVOp;
4916 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4917 // Only do this if the current op isn't legal and the flipped is.
4918 if (!TLI.isOperationLegal(Opcode, VT) &&
4919 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4920 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4921 unsigned AltOpcode;
4922 switch (Opcode) {
4923 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4924 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4925 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4926 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4927 default: llvm_unreachable("Unknown MINMAX opcode");
4929 if (TLI.isOperationLegal(AltOpcode, VT))
4930 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
4933 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
4934 if (SDValue S = PerformMinMaxFpToSatCombine(
4935 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
4936 return S;
4938 // Simplify the operands using demanded-bits information.
4939 if (SimplifyDemandedBits(SDValue(N, 0)))
4940 return SDValue(N, 0);
4942 return SDValue();
4945 /// If this is a bitwise logic instruction and both operands have the same
4946 /// opcode, try to sink the other opcode after the logic instruction.
4947 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4948 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4949 EVT VT = N0.getValueType();
4950 unsigned LogicOpcode = N->getOpcode();
4951 unsigned HandOpcode = N0.getOpcode();
4952 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4953 LogicOpcode == ISD::XOR) && "Expected logic opcode");
4954 assert(HandOpcode == N1.getOpcode() && "Bad input!");
4956 // Bail early if none of these transforms apply.
4957 if (N0.getNumOperands() == 0)
4958 return SDValue();
4960 // FIXME: We should check number of uses of the operands to not increase
4961 // the instruction count for all transforms.
4963 // Handle size-changing casts.
4964 SDValue X = N0.getOperand(0);
4965 SDValue Y = N1.getOperand(0);
4966 EVT XVT = X.getValueType();
4967 SDLoc DL(N);
4968 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4969 HandOpcode == ISD::SIGN_EXTEND) {
4970 // If both operands have other uses, this transform would create extra
4971 // instructions without eliminating anything.
4972 if (!N0.hasOneUse() && !N1.hasOneUse())
4973 return SDValue();
4974 // We need matching integer source types.
4975 if (XVT != Y.getValueType())
4976 return SDValue();
4977 // Don't create an illegal op during or after legalization. Don't ever
4978 // create an unsupported vector op.
4979 if ((VT.isVector() || LegalOperations) &&
4980 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4981 return SDValue();
4982 // Avoid infinite looping with PromoteIntBinOp.
4983 // TODO: Should we apply desirable/legal constraints to all opcodes?
4984 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4985 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4986 return SDValue();
4987 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4988 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4989 return DAG.getNode(HandOpcode, DL, VT, Logic);
4992 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4993 if (HandOpcode == ISD::TRUNCATE) {
4994 // If both operands have other uses, this transform would create extra
4995 // instructions without eliminating anything.
4996 if (!N0.hasOneUse() && !N1.hasOneUse())
4997 return SDValue();
4998 // We need matching source types.
4999 if (XVT != Y.getValueType())
5000 return SDValue();
5001 // Don't create an illegal op during or after legalization.
5002 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5003 return SDValue();
5004 // Be extra careful sinking truncate. If it's free, there's no benefit in
5005 // widening a binop. Also, don't create a logic op on an illegal type.
5006 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5007 return SDValue();
5008 if (!TLI.isTypeLegal(XVT))
5009 return SDValue();
5010 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5011 return DAG.getNode(HandOpcode, DL, VT, Logic);
5014 // For binops SHL/SRL/SRA/AND:
5015 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5016 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5017 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5018 N0.getOperand(1) == N1.getOperand(1)) {
5019 // If either operand has other uses, this transform is not an improvement.
5020 if (!N0.hasOneUse() || !N1.hasOneUse())
5021 return SDValue();
5022 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5023 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5026 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5027 if (HandOpcode == ISD::BSWAP) {
5028 // If either operand has other uses, this transform is not an improvement.
5029 if (!N0.hasOneUse() || !N1.hasOneUse())
5030 return SDValue();
5031 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5032 return DAG.getNode(HandOpcode, DL, VT, Logic);
5035 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5036 // Only perform this optimization up until type legalization, before
5037 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5038 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5039 // we don't want to undo this promotion.
5040 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5041 // on scalars.
5042 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5043 Level <= AfterLegalizeTypes) {
5044 // Input types must be integer and the same.
5045 if (XVT.isInteger() && XVT == Y.getValueType() &&
5046 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5047 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5048 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5049 return DAG.getNode(HandOpcode, DL, VT, Logic);
5053 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5054 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5055 // If both shuffles use the same mask, and both shuffle within a single
5056 // vector, then it is worthwhile to move the swizzle after the operation.
5057 // The type-legalizer generates this pattern when loading illegal
5058 // vector types from memory. In many cases this allows additional shuffle
5059 // optimizations.
5060 // There are other cases where moving the shuffle after the xor/and/or
5061 // is profitable even if shuffles don't perform a swizzle.
5062 // If both shuffles use the same mask, and both shuffles have the same first
5063 // or second operand, then it might still be profitable to move the shuffle
5064 // after the xor/and/or operation.
5065 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5066 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5067 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5068 assert(X.getValueType() == Y.getValueType() &&
5069 "Inputs to shuffles are not the same type");
5071 // Check that both shuffles use the same mask. The masks are known to be of
5072 // the same length because the result vector type is the same.
5073 // Check also that shuffles have only one use to avoid introducing extra
5074 // instructions.
5075 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5076 !SVN0->getMask().equals(SVN1->getMask()))
5077 return SDValue();
5079 // Don't try to fold this node if it requires introducing a
5080 // build vector of all zeros that might be illegal at this stage.
5081 SDValue ShOp = N0.getOperand(1);
5082 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5083 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5085 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5086 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5087 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5088 N0.getOperand(0), N1.getOperand(0));
5089 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5092 // Don't try to fold this node if it requires introducing a
5093 // build vector of all zeros that might be illegal at this stage.
5094 ShOp = N0.getOperand(0);
5095 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5096 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5098 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5099 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5100 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5101 N1.getOperand(1));
5102 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5106 return SDValue();
5109 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5110 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5111 const SDLoc &DL) {
5112 SDValue LL, LR, RL, RR, N0CC, N1CC;
5113 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5114 !isSetCCEquivalent(N1, RL, RR, N1CC))
5115 return SDValue();
5117 assert(N0.getValueType() == N1.getValueType() &&
5118 "Unexpected operand types for bitwise logic op");
5119 assert(LL.getValueType() == LR.getValueType() &&
5120 RL.getValueType() == RR.getValueType() &&
5121 "Unexpected operand types for setcc");
5123 // If we're here post-legalization or the logic op type is not i1, the logic
5124 // op type must match a setcc result type. Also, all folds require new
5125 // operations on the left and right operands, so those types must match.
5126 EVT VT = N0.getValueType();
5127 EVT OpVT = LL.getValueType();
5128 if (LegalOperations || VT.getScalarType() != MVT::i1)
5129 if (VT != getSetCCResultType(OpVT))
5130 return SDValue();
5131 if (OpVT != RL.getValueType())
5132 return SDValue();
5134 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5135 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5136 bool IsInteger = OpVT.isInteger();
5137 if (LR == RR && CC0 == CC1 && IsInteger) {
5138 bool IsZero = isNullOrNullSplat(LR);
5139 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5141 // All bits clear?
5142 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5143 // All sign bits clear?
5144 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5145 // Any bits set?
5146 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5147 // Any sign bits set?
5148 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5150 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5151 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5152 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5153 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5154 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5155 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5156 AddToWorklist(Or.getNode());
5157 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5160 // All bits set?
5161 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5162 // All sign bits set?
5163 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5164 // Any bits clear?
5165 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5166 // Any sign bits clear?
5167 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5169 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5170 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5171 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5172 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5173 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5174 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5175 AddToWorklist(And.getNode());
5176 return DAG.getSetCC(DL, VT, And, LR, CC1);
5180 // TODO: What is the 'or' equivalent of this fold?
5181 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5182 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5183 IsInteger && CC0 == ISD::SETNE &&
5184 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5185 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5186 SDValue One = DAG.getConstant(1, DL, OpVT);
5187 SDValue Two = DAG.getConstant(2, DL, OpVT);
5188 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5189 AddToWorklist(Add.getNode());
5190 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5193 // Try more general transforms if the predicates match and the only user of
5194 // the compares is the 'and' or 'or'.
5195 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5196 N0.hasOneUse() && N1.hasOneUse()) {
5197 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5198 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5199 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5200 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5201 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5202 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5203 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5204 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5207 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5208 // TODO - support non-uniform vector amounts.
5209 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5210 // Match a shared variable operand and 2 non-opaque constant operands.
5211 ConstantSDNode *C0 = isConstOrConstSplat(LR);
5212 ConstantSDNode *C1 = isConstOrConstSplat(RR);
5213 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5214 const APInt &CMax =
5215 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5216 const APInt &CMin =
5217 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5218 // The difference of the constants must be a single bit.
5219 if ((CMax - CMin).isPowerOf2()) {
5220 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5221 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5222 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5223 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5224 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5225 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5226 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5227 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5228 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5229 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5235 // Canonicalize equivalent operands to LL == RL.
5236 if (LL == RR && LR == RL) {
5237 CC1 = ISD::getSetCCSwappedOperands(CC1);
5238 std::swap(RL, RR);
5241 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5242 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5243 if (LL == RL && LR == RR) {
5244 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5245 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5246 if (NewCC != ISD::SETCC_INVALID &&
5247 (!LegalOperations ||
5248 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5249 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5250 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5253 return SDValue();
5256 /// This contains all DAGCombine rules which reduce two values combined by
5257 /// an And operation to a single value. This makes them reusable in the context
5258 /// of visitSELECT(). Rules involving constants are not included as
5259 /// visitSELECT() already handles those cases.
5260 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5261 EVT VT = N1.getValueType();
5262 SDLoc DL(N);
5264 // fold (and x, undef) -> 0
5265 if (N0.isUndef() || N1.isUndef())
5266 return DAG.getConstant(0, DL, VT);
5268 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5269 return V;
5271 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5272 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5273 VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5274 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5275 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5276 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5277 // immediate for an add, but it is legal if its top c2 bits are set,
5278 // transform the ADD so the immediate doesn't need to be materialized
5279 // in a register.
5280 APInt ADDC = ADDI->getAPIntValue();
5281 APInt SRLC = SRLI->getAPIntValue();
5282 if (ADDC.getMinSignedBits() <= 64 &&
5283 SRLC.ult(VT.getSizeInBits()) &&
5284 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5285 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5286 SRLC.getZExtValue());
5287 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5288 ADDC |= Mask;
5289 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5290 SDLoc DL0(N0);
5291 SDValue NewAdd =
5292 DAG.getNode(ISD::ADD, DL0, VT,
5293 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5294 CombineTo(N0.getNode(), NewAdd);
5295 // Return N so it doesn't get rechecked!
5296 return SDValue(N, 0);
5304 // Reduce bit extract of low half of an integer to the narrower type.
5305 // (and (srl i64:x, K), KMask) ->
5306 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5307 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5308 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5309 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5310 unsigned Size = VT.getSizeInBits();
5311 const APInt &AndMask = CAnd->getAPIntValue();
5312 unsigned ShiftBits = CShift->getZExtValue();
5314 // Bail out, this node will probably disappear anyway.
5315 if (ShiftBits == 0)
5316 return SDValue();
5318 unsigned MaskBits = AndMask.countTrailingOnes();
5319 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5321 if (AndMask.isMask() &&
5322 // Required bits must not span the two halves of the integer and
5323 // must fit in the half size type.
5324 (ShiftBits + MaskBits <= Size / 2) &&
5325 TLI.isNarrowingProfitable(VT, HalfVT) &&
5326 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5327 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5328 TLI.isTruncateFree(VT, HalfVT) &&
5329 TLI.isZExtFree(HalfVT, VT)) {
5330 // The isNarrowingProfitable is to avoid regressions on PPC and
5331 // AArch64 which match a few 64-bit bit insert / bit extract patterns
5332 // on downstream users of this. Those patterns could probably be
5333 // extended to handle extensions mixed in.
5335 SDValue SL(N0);
5336 assert(MaskBits <= Size);
5338 // Extracting the highest bit of the low half.
5339 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5340 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5341 N0.getOperand(0));
5343 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5344 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5345 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5346 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5347 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5353 return SDValue();
5356 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5357 EVT LoadResultTy, EVT &ExtVT) {
5358 if (!AndC->getAPIntValue().isMask())
5359 return false;
5361 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5363 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5364 EVT LoadedVT = LoadN->getMemoryVT();
5366 if (ExtVT == LoadedVT &&
5367 (!LegalOperations ||
5368 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5369 // ZEXTLOAD will match without needing to change the size of the value being
5370 // loaded.
5371 return true;
5374 // Do not change the width of a volatile or atomic loads.
5375 if (!LoadN->isSimple())
5376 return false;
5378 // Do not generate loads of non-round integer types since these can
5379 // be expensive (and would be wrong if the type is not byte sized).
5380 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5381 return false;
5383 if (LegalOperations &&
5384 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5385 return false;
5387 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5388 return false;
5390 return true;
5393 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5394 ISD::LoadExtType ExtType, EVT &MemVT,
5395 unsigned ShAmt) {
5396 if (!LDST)
5397 return false;
5398 // Only allow byte offsets.
5399 if (ShAmt % 8)
5400 return false;
5402 // Do not generate loads of non-round integer types since these can
5403 // be expensive (and would be wrong if the type is not byte sized).
5404 if (!MemVT.isRound())
5405 return false;
5407 // Don't change the width of a volatile or atomic loads.
5408 if (!LDST->isSimple())
5409 return false;
5411 EVT LdStMemVT = LDST->getMemoryVT();
5413 // Bail out when changing the scalable property, since we can't be sure that
5414 // we're actually narrowing here.
5415 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5416 return false;
5418 // Verify that we are actually reducing a load width here.
5419 if (LdStMemVT.bitsLT(MemVT))
5420 return false;
5422 // Ensure that this isn't going to produce an unsupported memory access.
5423 if (ShAmt) {
5424 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5425 const unsigned ByteShAmt = ShAmt / 8;
5426 const Align LDSTAlign = LDST->getAlign();
5427 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5428 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5429 LDST->getAddressSpace(), NarrowAlign,
5430 LDST->getMemOperand()->getFlags()))
5431 return false;
5434 // It's not possible to generate a constant of extended or untyped type.
5435 EVT PtrType = LDST->getBasePtr().getValueType();
5436 if (PtrType == MVT::Untyped || PtrType.isExtended())
5437 return false;
5439 if (isa<LoadSDNode>(LDST)) {
5440 LoadSDNode *Load = cast<LoadSDNode>(LDST);
5441 // Don't transform one with multiple uses, this would require adding a new
5442 // load.
5443 if (!SDValue(Load, 0).hasOneUse())
5444 return false;
5446 if (LegalOperations &&
5447 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5448 return false;
5450 // For the transform to be legal, the load must produce only two values
5451 // (the value loaded and the chain). Don't transform a pre-increment
5452 // load, for example, which produces an extra value. Otherwise the
5453 // transformation is not equivalent, and the downstream logic to replace
5454 // uses gets things wrong.
5455 if (Load->getNumValues() > 2)
5456 return false;
5458 // If the load that we're shrinking is an extload and we're not just
5459 // discarding the extension we can't simply shrink the load. Bail.
5460 // TODO: It would be possible to merge the extensions in some cases.
5461 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5462 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5463 return false;
5465 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5466 return false;
5467 } else {
5468 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5469 StoreSDNode *Store = cast<StoreSDNode>(LDST);
5470 // Can't write outside the original store
5471 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5472 return false;
5474 if (LegalOperations &&
5475 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5476 return false;
5478 return true;
5481 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5482 SmallVectorImpl<LoadSDNode*> &Loads,
5483 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5484 ConstantSDNode *Mask,
5485 SDNode *&NodeToMask) {
5486 // Recursively search for the operands, looking for loads which can be
5487 // narrowed.
5488 for (SDValue Op : N->op_values()) {
5489 if (Op.getValueType().isVector())
5490 return false;
5492 // Some constants may need fixing up later if they are too large.
5493 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5494 if (Mask->getValueType(0) != C->getValueType(0))
5495 return false;
5496 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5497 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5498 NodesWithConsts.insert(N);
5499 continue;
5502 if (!Op.hasOneUse())
5503 return false;
5505 switch(Op.getOpcode()) {
5506 case ISD::LOAD: {
5507 auto *Load = cast<LoadSDNode>(Op);
5508 EVT ExtVT;
5509 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5510 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5512 // ZEXTLOAD is already small enough.
5513 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5514 ExtVT.bitsGE(Load->getMemoryVT()))
5515 continue;
5517 // Use LE to convert equal sized loads to zext.
5518 if (ExtVT.bitsLE(Load->getMemoryVT()))
5519 Loads.push_back(Load);
5521 continue;
5523 return false;
5525 case ISD::ZERO_EXTEND:
5526 case ISD::AssertZext: {
5527 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5528 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5529 EVT VT = Op.getOpcode() == ISD::AssertZext
5530 ? cast<VTSDNode>(Op.getOperand(1))->getVT()
5531 : Op.getOperand(0).getValueType();
5533 // We can accept extending nodes if the mask is wider or an equal
5534 // width to the original type.
5535 if (ExtVT.bitsGE(VT))
5536 continue;
5537 break;
5539 case ISD::ANY_EXTEND: {
5540 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5541 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5542 EVT VT = Op.getOperand(0).getValueType();
5543 if (ExtVT.bitsGE(VT))
5544 break;
5545 // Fallthrough to searching for nodes from the operands of the extend.
5546 LLVM_FALLTHROUGH;
5548 case ISD::OR:
5549 case ISD::XOR:
5550 case ISD::AND:
5551 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5552 NodeToMask))
5553 return false;
5554 continue;
5557 // Allow one node which will masked along with any loads found.
5558 if (NodeToMask)
5559 return false;
5561 // Also ensure that the node to be masked only produces one data result.
5562 NodeToMask = Op.getNode();
5563 if (NodeToMask->getNumValues() > 1) {
5564 bool HasValue = false;
5565 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5566 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5567 if (VT != MVT::Glue && VT != MVT::Other) {
5568 if (HasValue) {
5569 NodeToMask = nullptr;
5570 return false;
5572 HasValue = true;
5575 assert(HasValue && "Node to be masked has no data result?");
5578 return true;
5581 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5582 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5583 if (!Mask)
5584 return false;
5586 if (!Mask->getAPIntValue().isMask())
5587 return false;
5589 // No need to do anything if the and directly uses a load.
5590 if (isa<LoadSDNode>(N->getOperand(0)))
5591 return false;
5593 SmallVector<LoadSDNode*, 8> Loads;
5594 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5595 SDNode *FixupNode = nullptr;
5596 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5597 if (Loads.size() == 0)
5598 return false;
5600 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5601 SDValue MaskOp = N->getOperand(1);
5603 // If it exists, fixup the single node we allow in the tree that needs
5604 // masking.
5605 if (FixupNode) {
5606 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5607 SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
5608 FixupNode->getValueType(0));
5609 SDValue And =
5610 DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
5611 SDValue(FixupNode, 0), MaskOpT);
5612 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5613 if (And.getOpcode() == ISD ::AND)
5614 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
5617 // Narrow any constants that need it.
5618 for (auto *LogicN : NodesWithConsts) {
5619 SDValue Op0 = LogicN->getOperand(0);
5620 SDValue Op1 = LogicN->getOperand(1);
5622 if (isa<ConstantSDNode>(Op0))
5623 std::swap(Op0, Op1);
5625 SDValue MaskOpT =
5626 DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
5627 SDValue And =
5628 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
5630 DAG.UpdateNodeOperands(LogicN, Op0, And);
5633 // Create narrow loads.
5634 for (auto *Load : Loads) {
5635 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5636 SDValue MaskOpT =
5637 DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
5638 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5639 SDValue(Load, 0), MaskOpT);
5640 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5641 if (And.getOpcode() == ISD ::AND)
5642 And = SDValue(
5643 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
5644 SDValue NewLoad = reduceLoadWidth(And.getNode());
5645 assert(NewLoad &&
5646 "Shouldn't be masking the load if it can't be narrowed");
5647 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5649 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5650 return true;
5652 return false;
5655 // Unfold
5656 // x & (-1 'logical shift' y)
5657 // To
5658 // (x 'opposite logical shift' y) 'logical shift' y
5659 // if it is better for performance.
5660 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5661 assert(N->getOpcode() == ISD::AND);
5663 SDValue N0 = N->getOperand(0);
5664 SDValue N1 = N->getOperand(1);
5666 // Do we actually prefer shifts over mask?
5667 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5668 return SDValue();
5670 // Try to match (-1 '[outer] logical shift' y)
5671 unsigned OuterShift;
5672 unsigned InnerShift; // The opposite direction to the OuterShift.
5673 SDValue Y; // Shift amount.
5674 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5675 if (!M.hasOneUse())
5676 return false;
5677 OuterShift = M->getOpcode();
5678 if (OuterShift == ISD::SHL)
5679 InnerShift = ISD::SRL;
5680 else if (OuterShift == ISD::SRL)
5681 InnerShift = ISD::SHL;
5682 else
5683 return false;
5684 if (!isAllOnesConstant(M->getOperand(0)))
5685 return false;
5686 Y = M->getOperand(1);
5687 return true;
5690 SDValue X;
5691 if (matchMask(N1))
5692 X = N0;
5693 else if (matchMask(N0))
5694 X = N1;
5695 else
5696 return SDValue();
5698 SDLoc DL(N);
5699 EVT VT = N->getValueType(0);
5701 // tmp = x 'opposite logical shift' y
5702 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5703 // ret = tmp 'logical shift' y
5704 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5706 return T1;
5709 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5710 /// For a target with a bit test, this is expected to become test + set and save
5711 /// at least 1 instruction.
5712 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5713 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5715 // This is probably not worthwhile without a supported type.
5716 EVT VT = And->getValueType(0);
5717 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5718 if (!TLI.isTypeLegal(VT))
5719 return SDValue();
5721 // Look through an optional extension and find a 'not'.
5722 // TODO: Should we favor test+set even without the 'not' op?
5723 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5724 if (Not.getOpcode() == ISD::ANY_EXTEND)
5725 Not = Not.getOperand(0);
5726 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5727 return SDValue();
5729 // Look though an optional truncation. The source operand may not be the same
5730 // type as the original 'and', but that is ok because we are masking off
5731 // everything but the low bit.
5732 SDValue Srl = Not.getOperand(0);
5733 if (Srl.getOpcode() == ISD::TRUNCATE)
5734 Srl = Srl.getOperand(0);
5736 // Match a shift-right by constant.
5737 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5738 !isa<ConstantSDNode>(Srl.getOperand(1)))
5739 return SDValue();
5741 // We might have looked through casts that make this transform invalid.
5742 // TODO: If the source type is wider than the result type, do the mask and
5743 // compare in the source type.
5744 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5745 unsigned VTBitWidth = VT.getSizeInBits();
5746 if (ShiftAmt.uge(VTBitWidth))
5747 return SDValue();
5749 // Turn this into a bit-test pattern using mask op + setcc:
5750 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5751 SDLoc DL(And);
5752 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5753 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5754 SDValue Mask = DAG.getConstant(
5755 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5756 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5757 SDValue Zero = DAG.getConstant(0, DL, VT);
5758 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5759 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5762 /// For targets that support usubsat, match a bit-hack form of that operation
5763 /// that ends in 'and' and convert it.
5764 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
5765 SDValue N0 = N->getOperand(0);
5766 SDValue N1 = N->getOperand(1);
5767 EVT VT = N1.getValueType();
5769 // Canonicalize SRA as operand 1.
5770 if (N0.getOpcode() == ISD::SRA)
5771 std::swap(N0, N1);
5773 // xor/add with SMIN (signmask) are logically equivalent.
5774 if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5775 return SDValue();
5777 if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5778 N0.getOperand(0) != N1.getOperand(0))
5779 return SDValue();
5781 unsigned BitWidth = VT.getScalarSizeInBits();
5782 ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
5783 ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
5784 if (!XorC || !XorC->getAPIntValue().isSignMask() ||
5785 !SraC || SraC->getAPIntValue() != BitWidth - 1)
5786 return SDValue();
5788 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
5789 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
5790 SDLoc DL(N);
5791 SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
5792 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
5795 SDValue DAGCombiner::visitAND(SDNode *N) {
5796 SDValue N0 = N->getOperand(0);
5797 SDValue N1 = N->getOperand(1);
5798 EVT VT = N1.getValueType();
5800 // x & x --> x
5801 if (N0 == N1)
5802 return N0;
5804 // fold (and c1, c2) -> c1&c2
5805 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5806 return C;
5808 // canonicalize constant to RHS
5809 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5810 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5811 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5813 // fold vector ops
5814 if (VT.isVector()) {
5815 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
5816 return FoldedVOp;
5818 // fold (and x, 0) -> 0, vector edition
5819 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5820 // do not return N0, because undef node may exist in N0
5821 return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
5822 SDLoc(N), N0.getValueType());
5823 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5824 // do not return N1, because undef node may exist in N1
5825 return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
5826 SDLoc(N), N1.getValueType());
5828 // fold (and x, -1) -> x, vector edition
5829 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5830 return N1;
5831 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5832 return N0;
5834 // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5835 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5836 auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5837 if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5838 N0.hasOneUse() && N1.hasOneUse()) {
5839 EVT LoadVT = MLoad->getMemoryVT();
5840 EVT ExtVT = VT;
5841 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5842 // For this AND to be a zero extension of the masked load the elements
5843 // of the BuildVec must mask the bottom bits of the extended element
5844 // type
5845 if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5846 uint64_t ElementSize =
5847 LoadVT.getVectorElementType().getScalarSizeInBits();
5848 if (Splat->getAPIntValue().isMask(ElementSize)) {
5849 return DAG.getMaskedLoad(
5850 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5851 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5852 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5853 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5860 // fold (and x, -1) -> x
5861 if (isAllOnesConstant(N1))
5862 return N0;
5864 // if (and x, c) is known to be zero, return 0
5865 unsigned BitWidth = VT.getScalarSizeInBits();
5866 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5867 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
5868 return DAG.getConstant(0, SDLoc(N), VT);
5870 if (SDValue NewSel = foldBinOpIntoSelect(N))
5871 return NewSel;
5873 // reassociate and
5874 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5875 return RAND;
5877 // Try to convert a constant mask AND into a shuffle clear mask.
5878 if (VT.isVector())
5879 if (SDValue Shuffle = XformToShuffleWithZero(N))
5880 return Shuffle;
5882 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5883 return Combined;
5885 // fold (and (or x, C), D) -> D if (C & D) == D
5886 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5887 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5889 if (N0.getOpcode() == ISD::OR &&
5890 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5891 return N1;
5892 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5893 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5894 SDValue N0Op0 = N0.getOperand(0);
5895 APInt Mask = ~N1C->getAPIntValue();
5896 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5897 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5898 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5899 N0.getValueType(), N0Op0);
5901 // Replace uses of the AND with uses of the Zero extend node.
5902 CombineTo(N, Zext);
5904 // We actually want to replace all uses of the any_extend with the
5905 // zero_extend, to avoid duplicating things. This will later cause this
5906 // AND to be folded.
5907 CombineTo(N0.getNode(), Zext);
5908 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5912 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5913 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5914 // already be zero by virtue of the width of the base type of the load.
5916 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5917 // more cases.
5918 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5919 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5920 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5921 N0.getOperand(0).getResNo() == 0) ||
5922 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5923 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5924 N0 : N0.getOperand(0) );
5926 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5927 // This can be a pure constant or a vector splat, in which case we treat the
5928 // vector as a scalar and use the splat value.
5929 APInt Constant = APInt::getZero(1);
5930 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5931 Constant = C->getAPIntValue();
5932 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5933 APInt SplatValue, SplatUndef;
5934 unsigned SplatBitSize;
5935 bool HasAnyUndefs;
5936 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5937 SplatBitSize, HasAnyUndefs);
5938 if (IsSplat) {
5939 // Undef bits can contribute to a possible optimisation if set, so
5940 // set them.
5941 SplatValue |= SplatUndef;
5943 // The splat value may be something like "0x00FFFFFF", which means 0 for
5944 // the first vector value and FF for the rest, repeating. We need a mask
5945 // that will apply equally to all members of the vector, so AND all the
5946 // lanes of the constant together.
5947 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5949 // If the splat value has been compressed to a bitlength lower
5950 // than the size of the vector lane, we need to re-expand it to
5951 // the lane size.
5952 if (EltBitWidth > SplatBitSize)
5953 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5954 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5955 SplatValue |= SplatValue.shl(SplatBitSize);
5957 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5958 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5959 if ((SplatBitSize % EltBitWidth) == 0) {
5960 Constant = APInt::getAllOnes(EltBitWidth);
5961 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5962 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5967 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5968 // actually legal and isn't going to get expanded, else this is a false
5969 // optimisation.
5970 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5971 Load->getValueType(0),
5972 Load->getMemoryVT());
5974 // Resize the constant to the same size as the original memory access before
5975 // extension. If it is still the AllOnesValue then this AND is completely
5976 // unneeded.
5977 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5979 bool B;
5980 switch (Load->getExtensionType()) {
5981 default: B = false; break;
5982 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5983 case ISD::ZEXTLOAD:
5984 case ISD::NON_EXTLOAD: B = true; break;
5987 if (B && Constant.isAllOnes()) {
5988 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5989 // preserve semantics once we get rid of the AND.
5990 SDValue NewLoad(Load, 0);
5992 // Fold the AND away. NewLoad may get replaced immediately.
5993 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5995 if (Load->getExtensionType() == ISD::EXTLOAD) {
5996 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5997 Load->getValueType(0), SDLoc(Load),
5998 Load->getChain(), Load->getBasePtr(),
5999 Load->getOffset(), Load->getMemoryVT(),
6000 Load->getMemOperand());
6001 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6002 if (Load->getNumValues() == 3) {
6003 // PRE/POST_INC loads have 3 values.
6004 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6005 NewLoad.getValue(2) };
6006 CombineTo(Load, To, 3, true);
6007 } else {
6008 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6012 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6016 // fold (and (masked_gather x)) -> (zext_masked_gather x)
6017 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6018 EVT MemVT = GN0->getMemoryVT();
6019 EVT ScalarVT = MemVT.getScalarType();
6021 if (SDValue(GN0, 0).hasOneUse() &&
6022 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6023 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6024 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
6025 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
6027 SDValue ZExtLoad = DAG.getMaskedGather(
6028 DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6029 GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6031 CombineTo(N, ZExtLoad);
6032 AddToWorklist(ZExtLoad.getNode());
6033 // Avoid recheck of N.
6034 return SDValue(N, 0);
6038 // fold (and (load x), 255) -> (zextload x, i8)
6039 // fold (and (extload x, i16), 255) -> (zextload x, i8)
6040 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6041 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6042 (N0.getOpcode() == ISD::ANY_EXTEND &&
6043 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6044 if (SDValue Res = reduceLoadWidth(N)) {
6045 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6046 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6047 AddToWorklist(N);
6048 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6049 return SDValue(N, 0);
6053 if (LegalTypes) {
6054 // Attempt to propagate the AND back up to the leaves which, if they're
6055 // loads, can be combined to narrow loads and the AND node can be removed.
6056 // Perform after legalization so that extend nodes will already be
6057 // combined into the loads.
6058 if (BackwardsPropagateMask(N))
6059 return SDValue(N, 0);
6062 if (SDValue Combined = visitANDLike(N0, N1, N))
6063 return Combined;
6065 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
6066 if (N0.getOpcode() == N1.getOpcode())
6067 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6068 return V;
6070 // Masking the negated extension of a boolean is just the zero-extended
6071 // boolean:
6072 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6073 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6075 // Note: the SimplifyDemandedBits fold below can make an information-losing
6076 // transform, and then we have no way to find this better fold.
6077 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6078 if (isNullOrNullSplat(N0.getOperand(0))) {
6079 SDValue SubRHS = N0.getOperand(1);
6080 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6081 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6082 return SubRHS;
6083 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6084 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6085 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6089 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6090 // fold (and (sra)) -> (and (srl)) when possible.
6091 if (SimplifyDemandedBits(SDValue(N, 0)))
6092 return SDValue(N, 0);
6094 // fold (zext_inreg (extload x)) -> (zextload x)
6095 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6096 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6097 (ISD::isEXTLoad(N0.getNode()) ||
6098 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6099 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6100 EVT MemVT = LN0->getMemoryVT();
6101 // If we zero all the possible extended bits, then we can turn this into
6102 // a zextload if we are running before legalize or the operation is legal.
6103 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6104 unsigned MemBitSize = MemVT.getScalarSizeInBits();
6105 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6106 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6107 ((!LegalOperations && LN0->isSimple()) ||
6108 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6109 SDValue ExtLoad =
6110 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6111 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6112 AddToWorklist(N);
6113 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6114 return SDValue(N, 0); // Return N so it doesn't get rechecked!
6118 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6119 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6120 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6121 N0.getOperand(1), false))
6122 return BSwap;
6125 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6126 return Shifts;
6128 if (TLI.hasBitTest(N0, N1))
6129 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6130 return V;
6132 // Recognize the following pattern:
6134 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6136 // where bitmask is a mask that clears the upper bits of AndVT. The
6137 // number of bits in bitmask must be a power of two.
6138 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6139 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6140 return false;
6142 auto *C = dyn_cast<ConstantSDNode>(RHS);
6143 if (!C)
6144 return false;
6146 if (!C->getAPIntValue().isMask(
6147 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6148 return false;
6150 return true;
6153 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6154 if (IsAndZeroExtMask(N0, N1))
6155 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6157 if (hasOperation(ISD::USUBSAT, VT))
6158 if (SDValue V = foldAndToUsubsat(N, DAG))
6159 return V;
6161 return SDValue();
6164 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6165 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6166 bool DemandHighBits) {
6167 if (!LegalOperations)
6168 return SDValue();
6170 EVT VT = N->getValueType(0);
6171 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6172 return SDValue();
6173 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6174 return SDValue();
6176 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6177 bool LookPassAnd0 = false;
6178 bool LookPassAnd1 = false;
6179 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6180 std::swap(N0, N1);
6181 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6182 std::swap(N0, N1);
6183 if (N0.getOpcode() == ISD::AND) {
6184 if (!N0.getNode()->hasOneUse())
6185 return SDValue();
6186 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6187 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6188 // This is needed for X86.
6189 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6190 N01C->getZExtValue() != 0xFFFF))
6191 return SDValue();
6192 N0 = N0.getOperand(0);
6193 LookPassAnd0 = true;
6196 if (N1.getOpcode() == ISD::AND) {
6197 if (!N1.getNode()->hasOneUse())
6198 return SDValue();
6199 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6200 if (!N11C || N11C->getZExtValue() != 0xFF)
6201 return SDValue();
6202 N1 = N1.getOperand(0);
6203 LookPassAnd1 = true;
6206 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6207 std::swap(N0, N1);
6208 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6209 return SDValue();
6210 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6211 return SDValue();
6213 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6214 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6215 if (!N01C || !N11C)
6216 return SDValue();
6217 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6218 return SDValue();
6220 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6221 SDValue N00 = N0->getOperand(0);
6222 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6223 if (!N00.getNode()->hasOneUse())
6224 return SDValue();
6225 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6226 if (!N001C || N001C->getZExtValue() != 0xFF)
6227 return SDValue();
6228 N00 = N00.getOperand(0);
6229 LookPassAnd0 = true;
6232 SDValue N10 = N1->getOperand(0);
6233 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6234 if (!N10.getNode()->hasOneUse())
6235 return SDValue();
6236 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6237 // Also allow 0xFFFF since the bits will be shifted out. This is needed
6238 // for X86.
6239 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6240 N101C->getZExtValue() != 0xFFFF))
6241 return SDValue();
6242 N10 = N10.getOperand(0);
6243 LookPassAnd1 = true;
6246 if (N00 != N10)
6247 return SDValue();
6249 // Make sure everything beyond the low halfword gets set to zero since the SRL
6250 // 16 will clear the top bits.
6251 unsigned OpSizeInBits = VT.getSizeInBits();
6252 if (DemandHighBits && OpSizeInBits > 16) {
6253 // If the left-shift isn't masked out then the only way this is a bswap is
6254 // if all bits beyond the low 8 are 0. In that case the entire pattern
6255 // reduces to a left shift anyway: leave it for other parts of the combiner.
6256 if (!LookPassAnd0)
6257 return SDValue();
6259 // However, if the right shift isn't masked out then it might be because
6260 // it's not needed. See if we can spot that too.
6261 if (!LookPassAnd1 &&
6262 !DAG.MaskedValueIsZero(
6263 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6264 return SDValue();
6267 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6268 if (OpSizeInBits > 16) {
6269 SDLoc DL(N);
6270 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6271 DAG.getConstant(OpSizeInBits - 16, DL,
6272 getShiftAmountTy(VT)));
6274 return Res;
6277 /// Return true if the specified node is an element that makes up a 32-bit
6278 /// packed halfword byteswap.
6279 /// ((x & 0x000000ff) << 8) |
6280 /// ((x & 0x0000ff00) >> 8) |
6281 /// ((x & 0x00ff0000) << 8) |
6282 /// ((x & 0xff000000) >> 8)
6283 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6284 if (!N.getNode()->hasOneUse())
6285 return false;
6287 unsigned Opc = N.getOpcode();
6288 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6289 return false;
6291 SDValue N0 = N.getOperand(0);
6292 unsigned Opc0 = N0.getOpcode();
6293 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6294 return false;
6296 ConstantSDNode *N1C = nullptr;
6297 // SHL or SRL: look upstream for AND mask operand
6298 if (Opc == ISD::AND)
6299 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6300 else if (Opc0 == ISD::AND)
6301 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6302 if (!N1C)
6303 return false;
6305 unsigned MaskByteOffset;
6306 switch (N1C->getZExtValue()) {
6307 default:
6308 return false;
6309 case 0xFF: MaskByteOffset = 0; break;
6310 case 0xFF00: MaskByteOffset = 1; break;
6311 case 0xFFFF:
6312 // In case demanded bits didn't clear the bits that will be shifted out.
6313 // This is needed for X86.
6314 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6315 MaskByteOffset = 1;
6316 break;
6318 return false;
6319 case 0xFF0000: MaskByteOffset = 2; break;
6320 case 0xFF000000: MaskByteOffset = 3; break;
6323 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6324 if (Opc == ISD::AND) {
6325 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6326 // (x >> 8) & 0xff
6327 // (x >> 8) & 0xff0000
6328 if (Opc0 != ISD::SRL)
6329 return false;
6330 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6331 if (!C || C->getZExtValue() != 8)
6332 return false;
6333 } else {
6334 // (x << 8) & 0xff00
6335 // (x << 8) & 0xff000000
6336 if (Opc0 != ISD::SHL)
6337 return false;
6338 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6339 if (!C || C->getZExtValue() != 8)
6340 return false;
6342 } else if (Opc == ISD::SHL) {
6343 // (x & 0xff) << 8
6344 // (x & 0xff0000) << 8
6345 if (MaskByteOffset != 0 && MaskByteOffset != 2)
6346 return false;
6347 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6348 if (!C || C->getZExtValue() != 8)
6349 return false;
6350 } else { // Opc == ISD::SRL
6351 // (x & 0xff00) >> 8
6352 // (x & 0xff000000) >> 8
6353 if (MaskByteOffset != 1 && MaskByteOffset != 3)
6354 return false;
6355 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6356 if (!C || C->getZExtValue() != 8)
6357 return false;
6360 if (Parts[MaskByteOffset])
6361 return false;
6363 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6364 return true;
6367 // Match 2 elements of a packed halfword bswap.
6368 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6369 if (N.getOpcode() == ISD::OR)
6370 return isBSwapHWordElement(N.getOperand(0), Parts) &&
6371 isBSwapHWordElement(N.getOperand(1), Parts);
6373 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6374 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6375 if (!C || C->getAPIntValue() != 16)
6376 return false;
6377 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6378 return true;
6381 return false;
6384 // Match this pattern:
6385 // (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6386 // And rewrite this to:
6387 // (rotr (bswap A), 16)
6388 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6389 SelectionDAG &DAG, SDNode *N, SDValue N0,
6390 SDValue N1, EVT VT, EVT ShiftAmountTy) {
6391 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6392 "MatchBSwapHWordOrAndAnd: expecting i32");
6393 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6394 return SDValue();
6395 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6396 return SDValue();
6397 // TODO: this is too restrictive; lifting this restriction requires more tests
6398 if (!N0->hasOneUse() || !N1->hasOneUse())
6399 return SDValue();
6400 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6401 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6402 if (!Mask0 || !Mask1)
6403 return SDValue();
6404 if (Mask0->getAPIntValue() != 0xff00ff00 ||
6405 Mask1->getAPIntValue() != 0x00ff00ff)
6406 return SDValue();
6407 SDValue Shift0 = N0.getOperand(0);
6408 SDValue Shift1 = N1.getOperand(0);
6409 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6410 return SDValue();
6411 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6412 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6413 if (!ShiftAmt0 || !ShiftAmt1)
6414 return SDValue();
6415 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6416 return SDValue();
6417 if (Shift0.getOperand(0) != Shift1.getOperand(0))
6418 return SDValue();
6420 SDLoc DL(N);
6421 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6422 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6423 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6426 /// Match a 32-bit packed halfword bswap. That is
6427 /// ((x & 0x000000ff) << 8) |
6428 /// ((x & 0x0000ff00) >> 8) |
6429 /// ((x & 0x00ff0000) << 8) |
6430 /// ((x & 0xff000000) >> 8)
6431 /// => (rotl (bswap x), 16)
6432 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6433 if (!LegalOperations)
6434 return SDValue();
6436 EVT VT = N->getValueType(0);
6437 if (VT != MVT::i32)
6438 return SDValue();
6439 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6440 return SDValue();
6442 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6443 getShiftAmountTy(VT)))
6444 return BSwap;
6446 // Try again with commuted operands.
6447 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6448 getShiftAmountTy(VT)))
6449 return BSwap;
6452 // Look for either
6453 // (or (bswaphpair), (bswaphpair))
6454 // (or (or (bswaphpair), (and)), (and))
6455 // (or (or (and), (bswaphpair)), (and))
6456 SDNode *Parts[4] = {};
6458 if (isBSwapHWordPair(N0, Parts)) {
6459 // (or (or (and), (and)), (or (and), (and)))
6460 if (!isBSwapHWordPair(N1, Parts))
6461 return SDValue();
6462 } else if (N0.getOpcode() == ISD::OR) {
6463 // (or (or (or (and), (and)), (and)), (and))
6464 if (!isBSwapHWordElement(N1, Parts))
6465 return SDValue();
6466 SDValue N00 = N0.getOperand(0);
6467 SDValue N01 = N0.getOperand(1);
6468 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6469 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6470 return SDValue();
6471 } else
6472 return SDValue();
6474 // Make sure the parts are all coming from the same node.
6475 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6476 return SDValue();
6478 SDLoc DL(N);
6479 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6480 SDValue(Parts[0], 0));
6482 // Result of the bswap should be rotated by 16. If it's not legal, then
6483 // do (x << 16) | (x >> 16).
6484 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6485 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6486 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6487 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6488 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6489 return DAG.getNode(ISD::OR, DL, VT,
6490 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6491 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6494 /// This contains all DAGCombine rules which reduce two values combined by
6495 /// an Or operation to a single value \see visitANDLike().
6496 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6497 EVT VT = N1.getValueType();
6498 SDLoc DL(N);
6500 // fold (or x, undef) -> -1
6501 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6502 return DAG.getAllOnesConstant(DL, VT);
6504 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6505 return V;
6507 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
6508 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6509 // Don't increase # computations.
6510 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6511 // We can only do this xform if we know that bits from X that are set in C2
6512 // but not in C1 are already zero. Likewise for Y.
6513 if (const ConstantSDNode *N0O1C =
6514 getAsNonOpaqueConstant(N0.getOperand(1))) {
6515 if (const ConstantSDNode *N1O1C =
6516 getAsNonOpaqueConstant(N1.getOperand(1))) {
6517 // We can only do this xform if we know that bits from X that are set in
6518 // C2 but not in C1 are already zero. Likewise for Y.
6519 const APInt &LHSMask = N0O1C->getAPIntValue();
6520 const APInt &RHSMask = N1O1C->getAPIntValue();
6522 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6523 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6524 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6525 N0.getOperand(0), N1.getOperand(0));
6526 return DAG.getNode(ISD::AND, DL, VT, X,
6527 DAG.getConstant(LHSMask | RHSMask, DL, VT));
6533 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6534 if (N0.getOpcode() == ISD::AND &&
6535 N1.getOpcode() == ISD::AND &&
6536 N0.getOperand(0) == N1.getOperand(0) &&
6537 // Don't increase # computations.
6538 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6539 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6540 N0.getOperand(1), N1.getOperand(1));
6541 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6544 return SDValue();
6547 /// OR combines for which the commuted variant will be tried as well.
6548 static SDValue visitORCommutative(
6549 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6550 EVT VT = N0.getValueType();
6551 if (N0.getOpcode() == ISD::AND) {
6552 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6553 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6554 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6556 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6557 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6558 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6561 return SDValue();
6564 SDValue DAGCombiner::visitOR(SDNode *N) {
6565 SDValue N0 = N->getOperand(0);
6566 SDValue N1 = N->getOperand(1);
6567 EVT VT = N1.getValueType();
6569 // x | x --> x
6570 if (N0 == N1)
6571 return N0;
6573 // fold (or c1, c2) -> c1|c2
6574 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6575 return C;
6577 // canonicalize constant to RHS
6578 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6579 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6580 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6582 // fold vector ops
6583 if (VT.isVector()) {
6584 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6585 return FoldedVOp;
6587 // fold (or x, 0) -> x, vector edition
6588 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6589 return N0;
6591 // fold (or x, -1) -> -1, vector edition
6592 if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6593 // do not return N1, because undef node may exist in N1
6594 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6596 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6597 // Do this only if the resulting shuffle is legal.
6598 if (isa<ShuffleVectorSDNode>(N0) &&
6599 isa<ShuffleVectorSDNode>(N1) &&
6600 // Avoid folding a node with illegal type.
6601 TLI.isTypeLegal(VT)) {
6602 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6603 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6604 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6605 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6606 // Ensure both shuffles have a zero input.
6607 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6608 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6609 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6610 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6611 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6612 bool CanFold = true;
6613 int NumElts = VT.getVectorNumElements();
6614 SmallVector<int, 4> Mask(NumElts);
6616 for (int i = 0; i != NumElts; ++i) {
6617 int M0 = SV0->getMaskElt(i);
6618 int M1 = SV1->getMaskElt(i);
6620 // Determine if either index is pointing to a zero vector.
6621 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6622 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6624 // If one element is zero and the otherside is undef, keep undef.
6625 // This also handles the case that both are undef.
6626 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6627 Mask[i] = -1;
6628 continue;
6631 // Make sure only one of the elements is zero.
6632 if (M0Zero == M1Zero) {
6633 CanFold = false;
6634 break;
6637 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6639 // We have a zero and non-zero element. If the non-zero came from
6640 // SV0 make the index a LHS index. If it came from SV1, make it
6641 // a RHS index. We need to mod by NumElts because we don't care
6642 // which operand it came from in the original shuffles.
6643 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6646 if (CanFold) {
6647 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6648 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6650 SDValue LegalShuffle =
6651 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6652 Mask, DAG);
6653 if (LegalShuffle)
6654 return LegalShuffle;
6660 // fold (or x, 0) -> x
6661 if (isNullConstant(N1))
6662 return N0;
6664 // fold (or x, -1) -> -1
6665 if (isAllOnesConstant(N1))
6666 return N1;
6668 if (SDValue NewSel = foldBinOpIntoSelect(N))
6669 return NewSel;
6671 // fold (or x, c) -> c iff (x & ~c) == 0
6672 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6673 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6674 return N1;
6676 if (SDValue Combined = visitORLike(N0, N1, N))
6677 return Combined;
6679 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6680 return Combined;
6682 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6683 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6684 return BSwap;
6685 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6686 return BSwap;
6688 // reassociate or
6689 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6690 return ROR;
6692 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6693 // iff (c1 & c2) != 0 or c1/c2 are undef.
6694 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6695 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6697 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6698 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6699 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6700 {N1, N0.getOperand(1)})) {
6701 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6702 AddToWorklist(IOR.getNode());
6703 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6707 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6708 return Combined;
6709 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6710 return Combined;
6712 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6713 if (N0.getOpcode() == N1.getOpcode())
6714 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6715 return V;
6717 // See if this is some rotate idiom.
6718 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6719 return Rot;
6721 if (SDValue Load = MatchLoadCombine(N))
6722 return Load;
6724 // Simplify the operands using demanded-bits information.
6725 if (SimplifyDemandedBits(SDValue(N, 0)))
6726 return SDValue(N, 0);
6728 // If OR can be rewritten into ADD, try combines based on ADD.
6729 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6730 DAG.haveNoCommonBitsSet(N0, N1))
6731 if (SDValue Combined = visitADDLike(N))
6732 return Combined;
6734 return SDValue();
6737 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6738 if (Op.getOpcode() == ISD::AND &&
6739 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6740 Mask = Op.getOperand(1);
6741 return Op.getOperand(0);
6743 return Op;
6746 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6747 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6748 SDValue &Mask) {
6749 Op = stripConstantMask(DAG, Op, Mask);
6750 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6751 Shift = Op;
6752 return true;
6754 return false;
6757 /// Helper function for visitOR to extract the needed side of a rotate idiom
6758 /// from a shl/srl/mul/udiv. This is meant to handle cases where
6759 /// InstCombine merged some outside op with one of the shifts from
6760 /// the rotate pattern.
6761 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6762 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6763 /// patterns:
6765 /// (or (add v v) (shrl v bitwidth-1)):
6766 /// expands (add v v) -> (shl v 1)
6768 /// (or (mul v c0) (shrl (mul v c1) c2)):
6769 /// expands (mul v c0) -> (shl (mul v c1) c3)
6771 /// (or (udiv v c0) (shl (udiv v c1) c2)):
6772 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6774 /// (or (shl v c0) (shrl (shl v c1) c2)):
6775 /// expands (shl v c0) -> (shl (shl v c1) c3)
6777 /// (or (shrl v c0) (shl (shrl v c1) c2)):
6778 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6780 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6781 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6782 SDValue ExtractFrom, SDValue &Mask,
6783 const SDLoc &DL) {
6784 assert(OppShift && ExtractFrom && "Empty SDValue");
6785 assert(
6786 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6787 "Existing shift must be valid as a rotate half");
6789 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6791 // Value and Type of the shift.
6792 SDValue OppShiftLHS = OppShift.getOperand(0);
6793 EVT ShiftedVT = OppShiftLHS.getValueType();
6795 // Amount of the existing shift.
6796 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6798 // (add v v) -> (shl v 1)
6799 // TODO: Should this be a general DAG canonicalization?
6800 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6801 ExtractFrom.getOpcode() == ISD::ADD &&
6802 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6803 ExtractFrom.getOperand(0) == OppShiftLHS &&
6804 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6805 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6806 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6808 // Preconditions:
6809 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6811 // Find opcode of the needed shift to be extracted from (op0 v c0).
6812 unsigned Opcode = ISD::DELETED_NODE;
6813 bool IsMulOrDiv = false;
6814 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6815 // opcode or its arithmetic (mul or udiv) variant.
6816 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6817 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6818 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6819 return false;
6820 Opcode = NeededShift;
6821 return true;
6823 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6824 // that the needed shift can be extracted from.
6825 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6826 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6827 return SDValue();
6829 // op0 must be the same opcode on both sides, have the same LHS argument,
6830 // and produce the same value type.
6831 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6832 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6833 ShiftedVT != ExtractFrom.getValueType())
6834 return SDValue();
6836 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6837 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6838 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6839 ConstantSDNode *ExtractFromCst =
6840 isConstOrConstSplat(ExtractFrom.getOperand(1));
6841 // TODO: We should be able to handle non-uniform constant vectors for these values
6842 // Check that we have constant values.
6843 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6844 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6845 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6846 return SDValue();
6848 // Compute the shift amount we need to extract to complete the rotate.
6849 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6850 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6851 return SDValue();
6852 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6853 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6854 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6855 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6856 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6858 // Now try extract the needed shift from the ExtractFrom op and see if the
6859 // result matches up with the existing shift's LHS op.
6860 if (IsMulOrDiv) {
6861 // Op to extract from is a mul or udiv by a constant.
6862 // Check:
6863 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6864 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6865 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6866 NeededShiftAmt.getZExtValue());
6867 APInt ResultAmt;
6868 APInt Rem;
6869 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6870 if (Rem != 0 || ResultAmt != OppLHSAmt)
6871 return SDValue();
6872 } else {
6873 // Op to extract from is a shift by a constant.
6874 // Check:
6875 // c2 - (bitwidth(op0 v c0) - c1) == c0
6876 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6877 ExtractFromAmt.getBitWidth()))
6878 return SDValue();
6881 // Return the expanded shift op that should allow a rotate to be formed.
6882 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6883 EVT ResVT = ExtractFrom.getValueType();
6884 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6885 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6888 // Return true if we can prove that, whenever Neg and Pos are both in the
6889 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6890 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6892 // (or (shift1 X, Neg), (shift2 X, Pos))
6894 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6895 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
6896 // to consider shift amounts with defined behavior.
6898 // The IsRotate flag should be set when the LHS of both shifts is the same.
6899 // Otherwise if matching a general funnel shift, it should be clear.
6900 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6901 SelectionDAG &DAG, bool IsRotate) {
6902 // If EltSize is a power of 2 then:
6904 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6905 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6907 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6908 // for the stronger condition:
6910 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6912 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6913 // we can just replace Neg with Neg' for the rest of the function.
6915 // In other cases we check for the even stronger condition:
6917 // Neg == EltSize - Pos [B]
6919 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6920 // behavior if Pos == 0 (and consequently Neg == EltSize).
6922 // We could actually use [A] whenever EltSize is a power of 2, but the
6923 // only extra cases that it would match are those uninteresting ones
6924 // where Neg and Pos are never in range at the same time. E.g. for
6925 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6926 // as well as (sub 32, Pos), but:
6928 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6930 // always invokes undefined behavior for 32-bit X.
6932 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6934 // NOTE: We can only do this when matching an AND and not a general
6935 // funnel shift.
6936 unsigned MaskLoBits = 0;
6937 if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6938 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6939 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6940 unsigned Bits = Log2_64(EltSize);
6941 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6942 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6943 Neg = Neg.getOperand(0);
6944 MaskLoBits = Bits;
6949 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6950 if (Neg.getOpcode() != ISD::SUB)
6951 return false;
6952 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6953 if (!NegC)
6954 return false;
6955 SDValue NegOp1 = Neg.getOperand(1);
6957 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6958 // Pos'. The truncation is redundant for the purpose of the equality.
6959 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6960 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6961 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6962 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6963 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6964 MaskLoBits))
6965 Pos = Pos.getOperand(0);
6969 // The condition we need is now:
6971 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6973 // If NegOp1 == Pos then we need:
6975 // EltSize & Mask == NegC & Mask
6977 // (because "x & Mask" is a truncation and distributes through subtraction).
6979 // We also need to account for a potential truncation of NegOp1 if the amount
6980 // has already been legalized to a shift amount type.
6981 APInt Width;
6982 if ((Pos == NegOp1) ||
6983 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6984 Width = NegC->getAPIntValue();
6986 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6987 // Then the condition we want to prove becomes:
6989 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6991 // which, again because "x & Mask" is a truncation, becomes:
6993 // NegC & Mask == (EltSize - PosC) & Mask
6994 // EltSize & Mask == (NegC + PosC) & Mask
6995 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6996 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6997 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6998 else
6999 return false;
7000 } else
7001 return false;
7003 // Now we just need to check that EltSize & Mask == Width & Mask.
7004 if (MaskLoBits)
7005 // EltSize & Mask is 0 since Mask is EltSize - 1.
7006 return Width.getLoBits(MaskLoBits) == 0;
7007 return Width == EltSize;
7010 // A subroutine of MatchRotate used once we have found an OR of two opposite
7011 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
7012 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7013 // former being preferred if supported. InnerPos and InnerNeg are Pos and
7014 // Neg with outer conversions stripped away.
7015 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7016 SDValue Neg, SDValue InnerPos,
7017 SDValue InnerNeg, unsigned PosOpcode,
7018 unsigned NegOpcode, const SDLoc &DL) {
7019 // fold (or (shl x, (*ext y)),
7020 // (srl x, (*ext (sub 32, y)))) ->
7021 // (rotl x, y) or (rotr x, (sub 32, y))
7023 // fold (or (shl x, (*ext (sub 32, y))),
7024 // (srl x, (*ext y))) ->
7025 // (rotr x, y) or (rotl x, (sub 32, y))
7026 EVT VT = Shifted.getValueType();
7027 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7028 /*IsRotate*/ true)) {
7029 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7030 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7031 HasPos ? Pos : Neg);
7034 return SDValue();
7037 // A subroutine of MatchRotate used once we have found an OR of two opposite
7038 // shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
7039 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7040 // former being preferred if supported. InnerPos and InnerNeg are Pos and
7041 // Neg with outer conversions stripped away.
7042 // TODO: Merge with MatchRotatePosNeg.
7043 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7044 SDValue Neg, SDValue InnerPos,
7045 SDValue InnerNeg, unsigned PosOpcode,
7046 unsigned NegOpcode, const SDLoc &DL) {
7047 EVT VT = N0.getValueType();
7048 unsigned EltBits = VT.getScalarSizeInBits();
7050 // fold (or (shl x0, (*ext y)),
7051 // (srl x1, (*ext (sub 32, y)))) ->
7052 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7054 // fold (or (shl x0, (*ext (sub 32, y))),
7055 // (srl x1, (*ext y))) ->
7056 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7057 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7058 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7059 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7060 HasPos ? Pos : Neg);
7063 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7064 // so for now just use the PosOpcode case if its legal.
7065 // TODO: When can we use the NegOpcode case?
7066 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7067 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7068 if (Op.getOpcode() != BinOpc)
7069 return false;
7070 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7071 return Cst && (Cst->getAPIntValue() == Imm);
7074 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7075 // -> (fshl x0, x1, y)
7076 if (IsBinOpImm(N1, ISD::SRL, 1) &&
7077 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7078 InnerPos == InnerNeg.getOperand(0) &&
7079 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7080 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7083 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7084 // -> (fshr x0, x1, y)
7085 if (IsBinOpImm(N0, ISD::SHL, 1) &&
7086 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7087 InnerNeg == InnerPos.getOperand(0) &&
7088 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7089 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7092 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7093 // -> (fshr x0, x1, y)
7094 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7095 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7096 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7097 InnerNeg == InnerPos.getOperand(0) &&
7098 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7099 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7103 return SDValue();
7106 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
7107 // idioms for rotate, and if the target supports rotation instructions, generate
7108 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7109 // with different shifted sources.
7110 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7111 EVT VT = LHS.getValueType();
7113 // The target must have at least one rotate/funnel flavor.
7114 // We still try to match rotate by constant pre-legalization.
7115 // TODO: Support pre-legalization funnel-shift by constant.
7116 bool HasROTL = hasOperation(ISD::ROTL, VT);
7117 bool HasROTR = hasOperation(ISD::ROTR, VT);
7118 bool HasFSHL = hasOperation(ISD::FSHL, VT);
7119 bool HasFSHR = hasOperation(ISD::FSHR, VT);
7120 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7121 return SDValue();
7123 // Check for truncated rotate.
7124 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7125 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7126 assert(LHS.getValueType() == RHS.getValueType());
7127 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7128 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7132 // Match "(X shl/srl V1) & V2" where V2 may not be present.
7133 SDValue LHSShift; // The shift.
7134 SDValue LHSMask; // AND value if any.
7135 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7137 SDValue RHSShift; // The shift.
7138 SDValue RHSMask; // AND value if any.
7139 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7141 // If neither side matched a rotate half, bail
7142 if (!LHSShift && !RHSShift)
7143 return SDValue();
7145 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7146 // side of the rotate, so try to handle that here. In all cases we need to
7147 // pass the matched shift from the opposite side to compute the opcode and
7148 // needed shift amount to extract. We still want to do this if both sides
7149 // matched a rotate half because one half may be a potential overshift that
7150 // can be broken down (ie if InstCombine merged two shl or srl ops into a
7151 // single one).
7153 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7154 if (LHSShift)
7155 if (SDValue NewRHSShift =
7156 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7157 RHSShift = NewRHSShift;
7158 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7159 if (RHSShift)
7160 if (SDValue NewLHSShift =
7161 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7162 LHSShift = NewLHSShift;
7164 // If a side is still missing, nothing else we can do.
7165 if (!RHSShift || !LHSShift)
7166 return SDValue();
7168 // At this point we've matched or extracted a shift op on each side.
7170 if (LHSShift.getOpcode() == RHSShift.getOpcode())
7171 return SDValue(); // Shifts must disagree.
7173 // TODO: Support pre-legalization funnel-shift by constant.
7174 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7175 if (!IsRotate && !(HasFSHL || HasFSHR))
7176 return SDValue(); // Requires funnel shift support.
7178 // Canonicalize shl to left side in a shl/srl pair.
7179 if (RHSShift.getOpcode() == ISD::SHL) {
7180 std::swap(LHS, RHS);
7181 std::swap(LHSShift, RHSShift);
7182 std::swap(LHSMask, RHSMask);
7185 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7186 SDValue LHSShiftArg = LHSShift.getOperand(0);
7187 SDValue LHSShiftAmt = LHSShift.getOperand(1);
7188 SDValue RHSShiftArg = RHSShift.getOperand(0);
7189 SDValue RHSShiftAmt = RHSShift.getOperand(1);
7191 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7192 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7193 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7194 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7195 // iff C1+C2 == EltSizeInBits
7196 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7197 ConstantSDNode *RHS) {
7198 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7200 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7201 SDValue Res;
7202 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7203 bool UseROTL = !LegalOperations || HasROTL;
7204 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7205 UseROTL ? LHSShiftAmt : RHSShiftAmt);
7206 } else {
7207 bool UseFSHL = !LegalOperations || HasFSHL;
7208 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7209 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7212 // If there is an AND of either shifted operand, apply it to the result.
7213 if (LHSMask.getNode() || RHSMask.getNode()) {
7214 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7215 SDValue Mask = AllOnes;
7217 if (LHSMask.getNode()) {
7218 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7219 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7220 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7222 if (RHSMask.getNode()) {
7223 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7224 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7225 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7228 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7231 return Res;
7234 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7235 // shift.
7236 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7237 return SDValue();
7239 // If there is a mask here, and we have a variable shift, we can't be sure
7240 // that we're masking out the right stuff.
7241 if (LHSMask.getNode() || RHSMask.getNode())
7242 return SDValue();
7244 // If the shift amount is sign/zext/any-extended just peel it off.
7245 SDValue LExtOp0 = LHSShiftAmt;
7246 SDValue RExtOp0 = RHSShiftAmt;
7247 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7248 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7249 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7250 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7251 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7252 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7253 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7254 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7255 LExtOp0 = LHSShiftAmt.getOperand(0);
7256 RExtOp0 = RHSShiftAmt.getOperand(0);
7259 if (IsRotate && (HasROTL || HasROTR)) {
7260 SDValue TryL =
7261 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7262 RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7263 if (TryL)
7264 return TryL;
7266 SDValue TryR =
7267 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7268 LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7269 if (TryR)
7270 return TryR;
7273 SDValue TryL =
7274 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7275 LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7276 if (TryL)
7277 return TryL;
7279 SDValue TryR =
7280 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7281 RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7282 if (TryR)
7283 return TryR;
7285 return SDValue();
7288 namespace {
7290 /// Represents known origin of an individual byte in load combine pattern. The
7291 /// value of the byte is either constant zero or comes from memory.
7292 struct ByteProvider {
7293 // For constant zero providers Load is set to nullptr. For memory providers
7294 // Load represents the node which loads the byte from memory.
7295 // ByteOffset is the offset of the byte in the value produced by the load.
7296 LoadSDNode *Load = nullptr;
7297 unsigned ByteOffset = 0;
7299 ByteProvider() = default;
7301 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7302 return ByteProvider(Load, ByteOffset);
7305 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7307 bool isConstantZero() const { return !Load; }
7308 bool isMemory() const { return Load; }
7310 bool operator==(const ByteProvider &Other) const {
7311 return Other.Load == Load && Other.ByteOffset == ByteOffset;
7314 private:
7315 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7316 : Load(Load), ByteOffset(ByteOffset) {}
7319 } // end anonymous namespace
7321 /// Recursively traverses the expression calculating the origin of the requested
7322 /// byte of the given value. Returns None if the provider can't be calculated.
7324 /// For all the values except the root of the expression verifies that the value
7325 /// has exactly one use and if it's not true return None. This way if the origin
7326 /// of the byte is returned it's guaranteed that the values which contribute to
7327 /// the byte are not used outside of this expression.
7329 /// Because the parts of the expression are not allowed to have more than one
7330 /// use this function iterates over trees, not DAGs. So it never visits the same
7331 /// node more than once.
7332 static const Optional<ByteProvider>
7333 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7334 bool Root = false) {
7335 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7336 if (Depth == 10)
7337 return None;
7339 if (!Root && !Op.hasOneUse())
7340 return None;
7342 assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7343 unsigned BitWidth = Op.getValueSizeInBits();
7344 if (BitWidth % 8 != 0)
7345 return None;
7346 unsigned ByteWidth = BitWidth / 8;
7347 assert(Index < ByteWidth && "invalid index requested");
7348 (void) ByteWidth;
7350 switch (Op.getOpcode()) {
7351 case ISD::OR: {
7352 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7353 if (!LHS)
7354 return None;
7355 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7356 if (!RHS)
7357 return None;
7359 if (LHS->isConstantZero())
7360 return RHS;
7361 if (RHS->isConstantZero())
7362 return LHS;
7363 return None;
7365 case ISD::SHL: {
7366 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7367 if (!ShiftOp)
7368 return None;
7370 uint64_t BitShift = ShiftOp->getZExtValue();
7371 if (BitShift % 8 != 0)
7372 return None;
7373 uint64_t ByteShift = BitShift / 8;
7375 return Index < ByteShift
7376 ? ByteProvider::getConstantZero()
7377 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7378 Depth + 1);
7380 case ISD::ANY_EXTEND:
7381 case ISD::SIGN_EXTEND:
7382 case ISD::ZERO_EXTEND: {
7383 SDValue NarrowOp = Op->getOperand(0);
7384 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7385 if (NarrowBitWidth % 8 != 0)
7386 return None;
7387 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7389 if (Index >= NarrowByteWidth)
7390 return Op.getOpcode() == ISD::ZERO_EXTEND
7391 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7392 : None;
7393 return calculateByteProvider(NarrowOp, Index, Depth + 1);
7395 case ISD::BSWAP:
7396 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7397 Depth + 1);
7398 case ISD::LOAD: {
7399 auto L = cast<LoadSDNode>(Op.getNode());
7400 if (!L->isSimple() || L->isIndexed())
7401 return None;
7403 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7404 if (NarrowBitWidth % 8 != 0)
7405 return None;
7406 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7408 if (Index >= NarrowByteWidth)
7409 return L->getExtensionType() == ISD::ZEXTLOAD
7410 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7411 : None;
7412 return ByteProvider::getMemory(L, Index);
7416 return None;
7419 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7420 return i;
7423 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7424 return BW - i - 1;
7427 // Check if the bytes offsets we are looking at match with either big or
7428 // little endian value loaded. Return true for big endian, false for little
7429 // endian, and None if match failed.
7430 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7431 int64_t FirstOffset) {
7432 // The endian can be decided only when it is 2 bytes at least.
7433 unsigned Width = ByteOffsets.size();
7434 if (Width < 2)
7435 return None;
7437 bool BigEndian = true, LittleEndian = true;
7438 for (unsigned i = 0; i < Width; i++) {
7439 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7440 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7441 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7442 if (!BigEndian && !LittleEndian)
7443 return None;
7446 assert((BigEndian != LittleEndian) && "It should be either big endian or"
7447 "little endian");
7448 return BigEndian;
7451 static SDValue stripTruncAndExt(SDValue Value) {
7452 switch (Value.getOpcode()) {
7453 case ISD::TRUNCATE:
7454 case ISD::ZERO_EXTEND:
7455 case ISD::SIGN_EXTEND:
7456 case ISD::ANY_EXTEND:
7457 return stripTruncAndExt(Value.getOperand(0));
7459 return Value;
7462 /// Match a pattern where a wide type scalar value is stored by several narrow
7463 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7464 /// supports it.
7466 /// Assuming little endian target:
7467 /// i8 *p = ...
7468 /// i32 val = ...
7469 /// p[0] = (val >> 0) & 0xFF;
7470 /// p[1] = (val >> 8) & 0xFF;
7471 /// p[2] = (val >> 16) & 0xFF;
7472 /// p[3] = (val >> 24) & 0xFF;
7473 /// =>
7474 /// *((i32)p) = val;
7476 /// i8 *p = ...
7477 /// i32 val = ...
7478 /// p[0] = (val >> 24) & 0xFF;
7479 /// p[1] = (val >> 16) & 0xFF;
7480 /// p[2] = (val >> 8) & 0xFF;
7481 /// p[3] = (val >> 0) & 0xFF;
7482 /// =>
7483 /// *((i32)p) = BSWAP(val);
7484 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7485 // The matching looks for "store (trunc x)" patterns that appear early but are
7486 // likely to be replaced by truncating store nodes during combining.
7487 // TODO: If there is evidence that running this later would help, this
7488 // limitation could be removed. Legality checks may need to be added
7489 // for the created store and optional bswap/rotate.
7490 if (LegalOperations || OptLevel == CodeGenOpt::None)
7491 return SDValue();
7493 // We only handle merging simple stores of 1-4 bytes.
7494 // TODO: Allow unordered atomics when wider type is legal (see D66309)
7495 EVT MemVT = N->getMemoryVT();
7496 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7497 !N->isSimple() || N->isIndexed())
7498 return SDValue();
7500 // Collect all of the stores in the chain.
7501 SDValue Chain = N->getChain();
7502 SmallVector<StoreSDNode *, 8> Stores = {N};
7503 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7504 // All stores must be the same size to ensure that we are writing all of the
7505 // bytes in the wide value.
7506 // TODO: We could allow multiple sizes by tracking each stored byte.
7507 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7508 Store->isIndexed())
7509 return SDValue();
7510 Stores.push_back(Store);
7511 Chain = Store->getChain();
7513 // There is no reason to continue if we do not have at least a pair of stores.
7514 if (Stores.size() < 2)
7515 return SDValue();
7517 // Handle simple types only.
7518 LLVMContext &Context = *DAG.getContext();
7519 unsigned NumStores = Stores.size();
7520 unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7521 unsigned WideNumBits = NumStores * NarrowNumBits;
7522 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7523 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7524 return SDValue();
7526 // Check if all bytes of the source value that we are looking at are stored
7527 // to the same base address. Collect offsets from Base address into OffsetMap.
7528 SDValue SourceValue;
7529 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7530 int64_t FirstOffset = INT64_MAX;
7531 StoreSDNode *FirstStore = nullptr;
7532 Optional<BaseIndexOffset> Base;
7533 for (auto Store : Stores) {
7534 // All the stores store different parts of the CombinedValue. A truncate is
7535 // required to get the partial value.
7536 SDValue Trunc = Store->getValue();
7537 if (Trunc.getOpcode() != ISD::TRUNCATE)
7538 return SDValue();
7539 // Other than the first/last part, a shift operation is required to get the
7540 // offset.
7541 int64_t Offset = 0;
7542 SDValue WideVal = Trunc.getOperand(0);
7543 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7544 isa<ConstantSDNode>(WideVal.getOperand(1))) {
7545 // The shift amount must be a constant multiple of the narrow type.
7546 // It is translated to the offset address in the wide source value "y".
7548 // x = srl y, ShiftAmtC
7549 // i8 z = trunc x
7550 // store z, ...
7551 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7552 if (ShiftAmtC % NarrowNumBits != 0)
7553 return SDValue();
7555 Offset = ShiftAmtC / NarrowNumBits;
7556 WideVal = WideVal.getOperand(0);
7559 // Stores must share the same source value with different offsets.
7560 // Truncate and extends should be stripped to get the single source value.
7561 if (!SourceValue)
7562 SourceValue = WideVal;
7563 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7564 return SDValue();
7565 else if (SourceValue.getValueType() != WideVT) {
7566 if (WideVal.getValueType() == WideVT ||
7567 WideVal.getScalarValueSizeInBits() >
7568 SourceValue.getScalarValueSizeInBits())
7569 SourceValue = WideVal;
7570 // Give up if the source value type is smaller than the store size.
7571 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7572 return SDValue();
7575 // Stores must share the same base address.
7576 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7577 int64_t ByteOffsetFromBase = 0;
7578 if (!Base)
7579 Base = Ptr;
7580 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7581 return SDValue();
7583 // Remember the first store.
7584 if (ByteOffsetFromBase < FirstOffset) {
7585 FirstStore = Store;
7586 FirstOffset = ByteOffsetFromBase;
7588 // Map the offset in the store and the offset in the combined value, and
7589 // early return if it has been set before.
7590 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7591 return SDValue();
7592 OffsetMap[Offset] = ByteOffsetFromBase;
7595 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7596 assert(FirstStore && "First store must be set");
7598 // Check that a store of the wide type is both allowed and fast on the target
7599 const DataLayout &Layout = DAG.getDataLayout();
7600 bool Fast = false;
7601 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7602 *FirstStore->getMemOperand(), &Fast);
7603 if (!Allowed || !Fast)
7604 return SDValue();
7606 // Check if the pieces of the value are going to the expected places in memory
7607 // to merge the stores.
7608 auto checkOffsets = [&](bool MatchLittleEndian) {
7609 if (MatchLittleEndian) {
7610 for (unsigned i = 0; i != NumStores; ++i)
7611 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7612 return false;
7613 } else { // MatchBigEndian by reversing loop counter.
7614 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7615 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7616 return false;
7618 return true;
7621 // Check if the offsets line up for the native data layout of this target.
7622 bool NeedBswap = false;
7623 bool NeedRotate = false;
7624 if (!checkOffsets(Layout.isLittleEndian())) {
7625 // Special-case: check if byte offsets line up for the opposite endian.
7626 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7627 NeedBswap = true;
7628 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7629 NeedRotate = true;
7630 else
7631 return SDValue();
7634 SDLoc DL(N);
7635 if (WideVT != SourceValue.getValueType()) {
7636 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7637 "Unexpected store value to merge");
7638 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7641 // Before legalize we can introduce illegal bswaps/rotates which will be later
7642 // converted to an explicit bswap sequence. This way we end up with a single
7643 // store and byte shuffling instead of several stores and byte shuffling.
7644 if (NeedBswap) {
7645 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7646 } else if (NeedRotate) {
7647 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7648 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7649 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7652 SDValue NewStore =
7653 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7654 FirstStore->getPointerInfo(), FirstStore->getAlign());
7656 // Rely on other DAG combine rules to remove the other individual stores.
7657 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7658 return NewStore;
7661 /// Match a pattern where a wide type scalar value is loaded by several narrow
7662 /// loads and combined by shifts and ors. Fold it into a single load or a load
7663 /// and a BSWAP if the targets supports it.
7665 /// Assuming little endian target:
7666 /// i8 *a = ...
7667 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7668 /// =>
7669 /// i32 val = *((i32)a)
7671 /// i8 *a = ...
7672 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7673 /// =>
7674 /// i32 val = BSWAP(*((i32)a))
7676 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7677 /// interact well with the worklist mechanism. When a part of the pattern is
7678 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7679 /// but the root node of the pattern which triggers the load combine is not
7680 /// necessarily a direct user of the changed node. For example, once the address
7681 /// of t28 load is reassociated load combine won't be triggered:
7682 /// t25: i32 = add t4, Constant:i32<2>
7683 /// t26: i64 = sign_extend t25
7684 /// t27: i64 = add t2, t26
7685 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7686 /// t29: i32 = zero_extend t28
7687 /// t32: i32 = shl t29, Constant:i8<8>
7688 /// t33: i32 = or t23, t32
7689 /// As a possible fix visitLoad can check if the load can be a part of a load
7690 /// combine pattern and add corresponding OR roots to the worklist.
7691 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7692 assert(N->getOpcode() == ISD::OR &&
7693 "Can only match load combining against OR nodes");
7695 // Handles simple types only
7696 EVT VT = N->getValueType(0);
7697 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7698 return SDValue();
7699 unsigned ByteWidth = VT.getSizeInBits() / 8;
7701 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7702 auto MemoryByteOffset = [&] (ByteProvider P) {
7703 assert(P.isMemory() && "Must be a memory byte provider");
7704 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7705 assert(LoadBitWidth % 8 == 0 &&
7706 "can only analyze providers for individual bytes not bit");
7707 unsigned LoadByteWidth = LoadBitWidth / 8;
7708 return IsBigEndianTarget
7709 ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7710 : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7713 Optional<BaseIndexOffset> Base;
7714 SDValue Chain;
7716 SmallPtrSet<LoadSDNode *, 8> Loads;
7717 Optional<ByteProvider> FirstByteProvider;
7718 int64_t FirstOffset = INT64_MAX;
7720 // Check if all the bytes of the OR we are looking at are loaded from the same
7721 // base address. Collect bytes offsets from Base address in ByteOffsets.
7722 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7723 unsigned ZeroExtendedBytes = 0;
7724 for (int i = ByteWidth - 1; i >= 0; --i) {
7725 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7726 if (!P)
7727 return SDValue();
7729 if (P->isConstantZero()) {
7730 // It's OK for the N most significant bytes to be 0, we can just
7731 // zero-extend the load.
7732 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7733 return SDValue();
7734 continue;
7736 assert(P->isMemory() && "provenance should either be memory or zero");
7738 LoadSDNode *L = P->Load;
7739 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7740 !L->isIndexed() &&
7741 "Must be enforced by calculateByteProvider");
7742 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7744 // All loads must share the same chain
7745 SDValue LChain = L->getChain();
7746 if (!Chain)
7747 Chain = LChain;
7748 else if (Chain != LChain)
7749 return SDValue();
7751 // Loads must share the same base address
7752 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7753 int64_t ByteOffsetFromBase = 0;
7754 if (!Base)
7755 Base = Ptr;
7756 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7757 return SDValue();
7759 // Calculate the offset of the current byte from the base address
7760 ByteOffsetFromBase += MemoryByteOffset(*P);
7761 ByteOffsets[i] = ByteOffsetFromBase;
7763 // Remember the first byte load
7764 if (ByteOffsetFromBase < FirstOffset) {
7765 FirstByteProvider = P;
7766 FirstOffset = ByteOffsetFromBase;
7769 Loads.insert(L);
7771 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7772 "memory, so there must be at least one load which produces the value");
7773 assert(Base && "Base address of the accessed memory location must be set");
7774 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7776 bool NeedsZext = ZeroExtendedBytes > 0;
7778 EVT MemVT =
7779 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7781 if (!MemVT.isSimple())
7782 return SDValue();
7784 // Before legalize we can introduce too wide illegal loads which will be later
7785 // split into legal sized loads. This enables us to combine i64 load by i8
7786 // patterns to a couple of i32 loads on 32 bit targets.
7787 if (LegalOperations &&
7788 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7789 MemVT))
7790 return SDValue();
7792 // Check if the bytes of the OR we are looking at match with either big or
7793 // little endian value load
7794 Optional<bool> IsBigEndian = isBigEndian(
7795 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7796 if (!IsBigEndian.hasValue())
7797 return SDValue();
7799 assert(FirstByteProvider && "must be set");
7801 // Ensure that the first byte is loaded from zero offset of the first load.
7802 // So the combined value can be loaded from the first load address.
7803 if (MemoryByteOffset(*FirstByteProvider) != 0)
7804 return SDValue();
7805 LoadSDNode *FirstLoad = FirstByteProvider->Load;
7807 // The node we are looking at matches with the pattern, check if we can
7808 // replace it with a single (possibly zero-extended) load and bswap + shift if
7809 // needed.
7811 // If the load needs byte swap check if the target supports it
7812 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7814 // Before legalize we can introduce illegal bswaps which will be later
7815 // converted to an explicit bswap sequence. This way we end up with a single
7816 // load and byte shuffling instead of several loads and byte shuffling.
7817 // We do not introduce illegal bswaps when zero-extending as this tends to
7818 // introduce too many arithmetic instructions.
7819 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7820 !TLI.isOperationLegal(ISD::BSWAP, VT))
7821 return SDValue();
7823 // If we need to bswap and zero extend, we have to insert a shift. Check that
7824 // it is legal.
7825 if (NeedsBswap && NeedsZext && LegalOperations &&
7826 !TLI.isOperationLegal(ISD::SHL, VT))
7827 return SDValue();
7829 // Check that a load of the wide type is both allowed and fast on the target
7830 bool Fast = false;
7831 bool Allowed =
7832 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7833 *FirstLoad->getMemOperand(), &Fast);
7834 if (!Allowed || !Fast)
7835 return SDValue();
7837 SDValue NewLoad =
7838 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7839 Chain, FirstLoad->getBasePtr(),
7840 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7842 // Transfer chain users from old loads to the new load.
7843 for (LoadSDNode *L : Loads)
7844 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7846 if (!NeedsBswap)
7847 return NewLoad;
7849 SDValue ShiftedLoad =
7850 NeedsZext
7851 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7852 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7853 SDLoc(N), LegalOperations))
7854 : NewLoad;
7855 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7858 // If the target has andn, bsl, or a similar bit-select instruction,
7859 // we want to unfold masked merge, with canonical pattern of:
7860 // | A | |B|
7861 // ((x ^ y) & m) ^ y
7862 // | D |
7863 // Into:
7864 // (x & m) | (y & ~m)
7865 // If y is a constant, m is not a 'not', and the 'andn' does not work with
7866 // immediates, we unfold into a different pattern:
7867 // ~(~x & m) & (m | y)
7868 // If x is a constant, m is a 'not', and the 'andn' does not work with
7869 // immediates, we unfold into a different pattern:
7870 // (x | ~m) & ~(~m & ~y)
7871 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7872 // the very least that breaks andnpd / andnps patterns, and because those
7873 // patterns are simplified in IR and shouldn't be created in the DAG
7874 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7875 assert(N->getOpcode() == ISD::XOR);
7877 // Don't touch 'not' (i.e. where y = -1).
7878 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7879 return SDValue();
7881 EVT VT = N->getValueType(0);
7883 // There are 3 commutable operators in the pattern,
7884 // so we have to deal with 8 possible variants of the basic pattern.
7885 SDValue X, Y, M;
7886 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7887 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7888 return false;
7889 SDValue Xor = And.getOperand(XorIdx);
7890 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7891 return false;
7892 SDValue Xor0 = Xor.getOperand(0);
7893 SDValue Xor1 = Xor.getOperand(1);
7894 // Don't touch 'not' (i.e. where y = -1).
7895 if (isAllOnesOrAllOnesSplat(Xor1))
7896 return false;
7897 if (Other == Xor0)
7898 std::swap(Xor0, Xor1);
7899 if (Other != Xor1)
7900 return false;
7901 X = Xor0;
7902 Y = Xor1;
7903 M = And.getOperand(XorIdx ? 0 : 1);
7904 return true;
7907 SDValue N0 = N->getOperand(0);
7908 SDValue N1 = N->getOperand(1);
7909 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7910 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7911 return SDValue();
7913 // Don't do anything if the mask is constant. This should not be reachable.
7914 // InstCombine should have already unfolded this pattern, and DAGCombiner
7915 // probably shouldn't produce it, too.
7916 if (isa<ConstantSDNode>(M.getNode()))
7917 return SDValue();
7919 // We can transform if the target has AndNot
7920 if (!TLI.hasAndNot(M))
7921 return SDValue();
7923 SDLoc DL(N);
7925 // If Y is a constant, check that 'andn' works with immediates. Unless M is
7926 // a bitwise not that would already allow ANDN to be used.
7927 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
7928 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7929 // If not, we need to do a bit more work to make sure andn is still used.
7930 SDValue NotX = DAG.getNOT(DL, X, VT);
7931 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7932 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7933 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7934 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7937 // If X is a constant and M is a bitwise not, check that 'andn' works with
7938 // immediates.
7939 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
7940 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
7941 // If not, we need to do a bit more work to make sure andn is still used.
7942 SDValue NotM = M.getOperand(0);
7943 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
7944 SDValue NotY = DAG.getNOT(DL, Y, VT);
7945 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
7946 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
7947 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
7950 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7951 SDValue NotM = DAG.getNOT(DL, M, VT);
7952 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7954 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7957 SDValue DAGCombiner::visitXOR(SDNode *N) {
7958 SDValue N0 = N->getOperand(0);
7959 SDValue N1 = N->getOperand(1);
7960 EVT VT = N0.getValueType();
7961 SDLoc DL(N);
7963 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7964 if (N0.isUndef() && N1.isUndef())
7965 return DAG.getConstant(0, DL, VT);
7967 // fold (xor x, undef) -> undef
7968 if (N0.isUndef())
7969 return N0;
7970 if (N1.isUndef())
7971 return N1;
7973 // fold (xor c1, c2) -> c1^c2
7974 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7975 return C;
7977 // canonicalize constant to RHS
7978 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7979 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7980 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7982 // fold vector ops
7983 if (VT.isVector()) {
7984 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7985 return FoldedVOp;
7987 // fold (xor x, 0) -> x, vector edition
7988 if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7989 return N0;
7992 // fold (xor x, 0) -> x
7993 if (isNullConstant(N1))
7994 return N0;
7996 if (SDValue NewSel = foldBinOpIntoSelect(N))
7997 return NewSel;
7999 // reassociate xor
8000 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8001 return RXOR;
8003 // fold !(x cc y) -> (x !cc y)
8004 unsigned N0Opcode = N0.getOpcode();
8005 SDValue LHS, RHS, CC;
8006 if (TLI.isConstTrueVal(N1.getNode()) &&
8007 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
8008 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8009 LHS.getValueType());
8010 if (!LegalOperations ||
8011 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8012 switch (N0Opcode) {
8013 default:
8014 llvm_unreachable("Unhandled SetCC Equivalent!");
8015 case ISD::SETCC:
8016 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8017 case ISD::SELECT_CC:
8018 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8019 N0.getOperand(3), NotCC);
8020 case ISD::STRICT_FSETCC:
8021 case ISD::STRICT_FSETCCS: {
8022 if (N0.hasOneUse()) {
8023 // FIXME Can we handle multiple uses? Could we token factor the chain
8024 // results from the new/old setcc?
8025 SDValue SetCC =
8026 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8027 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8028 CombineTo(N, SetCC);
8029 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8030 recursivelyDeleteUnusedNodes(N0.getNode());
8031 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8033 break;
8039 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8040 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8041 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8042 SDValue V = N0.getOperand(0);
8043 SDLoc DL0(N0);
8044 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8045 DAG.getConstant(1, DL0, V.getValueType()));
8046 AddToWorklist(V.getNode());
8047 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8050 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8051 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8052 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8053 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8054 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8055 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8056 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8057 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8058 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8059 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8062 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8063 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8064 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8065 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8066 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8067 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8068 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8069 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8070 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8071 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8075 // fold (not (neg x)) -> (add X, -1)
8076 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8077 // Y is a constant or the subtract has a single use.
8078 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8079 isNullConstant(N0.getOperand(0))) {
8080 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8081 DAG.getAllOnesConstant(DL, VT));
8084 // fold (not (add X, -1)) -> (neg X)
8085 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8086 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8087 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8088 N0.getOperand(0));
8091 // fold (xor (and x, y), y) -> (and (not x), y)
8092 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8093 SDValue X = N0.getOperand(0);
8094 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8095 AddToWorklist(NotX.getNode());
8096 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8099 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8100 ConstantSDNode *XorC = isConstOrConstSplat(N1);
8101 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
8102 unsigned BitWidth = VT.getScalarSizeInBits();
8103 if (XorC && ShiftC) {
8104 // Don't crash on an oversized shift. We can not guarantee that a bogus
8105 // shift has been simplified to undef.
8106 uint64_t ShiftAmt = ShiftC->getLimitedValue();
8107 if (ShiftAmt < BitWidth) {
8108 APInt Ones = APInt::getAllOnes(BitWidth);
8109 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8110 if (XorC->getAPIntValue() == Ones) {
8111 // If the xor constant is a shifted -1, do a 'not' before the shift:
8112 // xor (X << ShiftC), XorC --> (not X) << ShiftC
8113 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8114 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8115 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8121 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8122 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8123 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8124 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8125 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8126 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8127 SDValue S0 = S.getOperand(0);
8128 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8129 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8130 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8131 return DAG.getNode(ISD::ABS, DL, VT, S0);
8135 // fold (xor x, x) -> 0
8136 if (N0 == N1)
8137 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8139 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8140 // Here is a concrete example of this equivalence:
8141 // i16 x == 14
8142 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
8143 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8145 // =>
8147 // i16 ~1 == 0b1111111111111110
8148 // i16 rol(~1, 14) == 0b1011111111111111
8150 // Some additional tips to help conceptualize this transform:
8151 // - Try to see the operation as placing a single zero in a value of all ones.
8152 // - There exists no value for x which would allow the result to contain zero.
8153 // - Values of x larger than the bitwidth are undefined and do not require a
8154 // consistent result.
8155 // - Pushing the zero left requires shifting one bits in from the right.
8156 // A rotate left of ~1 is a nice way of achieving the desired result.
8157 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8158 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8159 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8160 N0.getOperand(1));
8163 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
8164 if (N0Opcode == N1.getOpcode())
8165 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8166 return V;
8168 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
8169 if (SDValue MM = unfoldMaskedMerge(N))
8170 return MM;
8172 // Simplify the expression using non-local knowledge.
8173 if (SimplifyDemandedBits(SDValue(N, 0)))
8174 return SDValue(N, 0);
8176 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
8177 return Combined;
8179 return SDValue();
8182 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8183 /// shift-by-constant operand with identical opcode, we may be able to convert
8184 /// that into 2 independent shifts followed by the logic op. This is a
8185 /// throughput improvement.
8186 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8187 // Match a one-use bitwise logic op.
8188 SDValue LogicOp = Shift->getOperand(0);
8189 if (!LogicOp.hasOneUse())
8190 return SDValue();
8192 unsigned LogicOpcode = LogicOp.getOpcode();
8193 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8194 LogicOpcode != ISD::XOR)
8195 return SDValue();
8197 // Find a matching one-use shift by constant.
8198 unsigned ShiftOpcode = Shift->getOpcode();
8199 SDValue C1 = Shift->getOperand(1);
8200 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8201 assert(C1Node && "Expected a shift with constant operand");
8202 const APInt &C1Val = C1Node->getAPIntValue();
8203 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8204 const APInt *&ShiftAmtVal) {
8205 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8206 return false;
8208 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8209 if (!ShiftCNode)
8210 return false;
8212 // Capture the shifted operand and shift amount value.
8213 ShiftOp = V.getOperand(0);
8214 ShiftAmtVal = &ShiftCNode->getAPIntValue();
8216 // Shift amount types do not have to match their operand type, so check that
8217 // the constants are the same width.
8218 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8219 return false;
8221 // The fold is not valid if the sum of the shift values exceeds bitwidth.
8222 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8223 return false;
8225 return true;
8228 // Logic ops are commutative, so check each operand for a match.
8229 SDValue X, Y;
8230 const APInt *C0Val;
8231 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8232 Y = LogicOp.getOperand(1);
8233 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8234 Y = LogicOp.getOperand(0);
8235 else
8236 return SDValue();
8238 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8239 SDLoc DL(Shift);
8240 EVT VT = Shift->getValueType(0);
8241 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8242 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8243 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8244 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8245 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8248 /// Handle transforms common to the three shifts, when the shift amount is a
8249 /// constant.
8250 /// We are looking for: (shift being one of shl/sra/srl)
8251 /// shift (binop X, C0), C1
8252 /// And want to transform into:
8253 /// binop (shift X, C1), (shift C0, C1)
8254 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8255 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8257 // Do not turn a 'not' into a regular xor.
8258 if (isBitwiseNot(N->getOperand(0)))
8259 return SDValue();
8261 // The inner binop must be one-use, since we want to replace it.
8262 SDValue LHS = N->getOperand(0);
8263 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8264 return SDValue();
8266 // TODO: This is limited to early combining because it may reveal regressions
8267 // otherwise. But since we just checked a target hook to see if this is
8268 // desirable, that should have filtered out cases where this interferes
8269 // with some other pattern matching.
8270 if (!LegalTypes)
8271 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8272 return R;
8274 // We want to pull some binops through shifts, so that we have (and (shift))
8275 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
8276 // thing happens with address calculations, so it's important to canonicalize
8277 // it.
8278 switch (LHS.getOpcode()) {
8279 default:
8280 return SDValue();
8281 case ISD::OR:
8282 case ISD::XOR:
8283 case ISD::AND:
8284 break;
8285 case ISD::ADD:
8286 if (N->getOpcode() != ISD::SHL)
8287 return SDValue(); // only shl(add) not sr[al](add).
8288 break;
8291 // We require the RHS of the binop to be a constant and not opaque as well.
8292 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8293 if (!BinOpCst)
8294 return SDValue();
8296 // FIXME: disable this unless the input to the binop is a shift by a constant
8297 // or is copy/select. Enable this in other cases when figure out it's exactly
8298 // profitable.
8299 SDValue BinOpLHSVal = LHS.getOperand(0);
8300 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8301 BinOpLHSVal.getOpcode() == ISD::SRA ||
8302 BinOpLHSVal.getOpcode() == ISD::SRL) &&
8303 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8304 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8305 BinOpLHSVal.getOpcode() == ISD::SELECT;
8307 if (!IsShiftByConstant && !IsCopyOrSelect)
8308 return SDValue();
8310 if (IsCopyOrSelect && N->hasOneUse())
8311 return SDValue();
8313 // Fold the constants, shifting the binop RHS by the shift amount.
8314 SDLoc DL(N);
8315 EVT VT = N->getValueType(0);
8316 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8317 N->getOperand(1));
8318 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8320 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8321 N->getOperand(1));
8322 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8325 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8326 assert(N->getOpcode() == ISD::TRUNCATE);
8327 assert(N->getOperand(0).getOpcode() == ISD::AND);
8329 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8330 EVT TruncVT = N->getValueType(0);
8331 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8332 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8333 SDValue N01 = N->getOperand(0).getOperand(1);
8334 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8335 SDLoc DL(N);
8336 SDValue N00 = N->getOperand(0).getOperand(0);
8337 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8338 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8339 AddToWorklist(Trunc00.getNode());
8340 AddToWorklist(Trunc01.getNode());
8341 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8345 return SDValue();
8348 SDValue DAGCombiner::visitRotate(SDNode *N) {
8349 SDLoc dl(N);
8350 SDValue N0 = N->getOperand(0);
8351 SDValue N1 = N->getOperand(1);
8352 EVT VT = N->getValueType(0);
8353 unsigned Bitsize = VT.getScalarSizeInBits();
8355 // fold (rot x, 0) -> x
8356 if (isNullOrNullSplat(N1))
8357 return N0;
8359 // fold (rot x, c) -> x iff (c % BitSize) == 0
8360 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8361 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8362 if (DAG.MaskedValueIsZero(N1, ModuloMask))
8363 return N0;
8366 // fold (rot x, c) -> (rot x, c % BitSize)
8367 bool OutOfRange = false;
8368 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8369 OutOfRange |= C->getAPIntValue().uge(Bitsize);
8370 return true;
8372 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8373 EVT AmtVT = N1.getValueType();
8374 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8375 if (SDValue Amt =
8376 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8377 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8380 // rot i16 X, 8 --> bswap X
8381 auto *RotAmtC = isConstOrConstSplat(N1);
8382 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8383 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8384 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8386 // Simplify the operands using demanded-bits information.
8387 if (SimplifyDemandedBits(SDValue(N, 0)))
8388 return SDValue(N, 0);
8390 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8391 if (N1.getOpcode() == ISD::TRUNCATE &&
8392 N1.getOperand(0).getOpcode() == ISD::AND) {
8393 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8394 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8397 unsigned NextOp = N0.getOpcode();
8398 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8399 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8400 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8401 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8402 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8403 EVT ShiftVT = C1->getValueType(0);
8404 bool SameSide = (N->getOpcode() == NextOp);
8405 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8406 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8407 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8408 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8409 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8410 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8411 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8412 CombinedShiftNorm);
8416 return SDValue();
8419 SDValue DAGCombiner::visitSHL(SDNode *N) {
8420 SDValue N0 = N->getOperand(0);
8421 SDValue N1 = N->getOperand(1);
8422 if (SDValue V = DAG.simplifyShift(N0, N1))
8423 return V;
8425 EVT VT = N0.getValueType();
8426 EVT ShiftVT = N1.getValueType();
8427 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8429 // fold (shl c1, c2) -> c1<<c2
8430 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8431 return C;
8433 // fold vector ops
8434 if (VT.isVector()) {
8435 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8436 return FoldedVOp;
8438 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8439 // If setcc produces all-one true value then:
8440 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8441 if (N1CV && N1CV->isConstant()) {
8442 if (N0.getOpcode() == ISD::AND) {
8443 SDValue N00 = N0->getOperand(0);
8444 SDValue N01 = N0->getOperand(1);
8445 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8447 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8448 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8449 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8450 if (SDValue C =
8451 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8452 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8458 if (SDValue NewSel = foldBinOpIntoSelect(N))
8459 return NewSel;
8461 // if (shl x, c) is known to be zero, return 0
8462 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8463 return DAG.getConstant(0, SDLoc(N), VT);
8465 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8466 if (N1.getOpcode() == ISD::TRUNCATE &&
8467 N1.getOperand(0).getOpcode() == ISD::AND) {
8468 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8469 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8472 if (SimplifyDemandedBits(SDValue(N, 0)))
8473 return SDValue(N, 0);
8475 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8476 if (N0.getOpcode() == ISD::SHL) {
8477 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8478 ConstantSDNode *RHS) {
8479 APInt c1 = LHS->getAPIntValue();
8480 APInt c2 = RHS->getAPIntValue();
8481 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8482 return (c1 + c2).uge(OpSizeInBits);
8484 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8485 return DAG.getConstant(0, SDLoc(N), VT);
8487 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8488 ConstantSDNode *RHS) {
8489 APInt c1 = LHS->getAPIntValue();
8490 APInt c2 = RHS->getAPIntValue();
8491 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8492 return (c1 + c2).ult(OpSizeInBits);
8494 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8495 SDLoc DL(N);
8496 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8497 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8501 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8502 // For this to be valid, the second form must not preserve any of the bits
8503 // that are shifted out by the inner shift in the first form. This means
8504 // the outer shift size must be >= the number of bits added by the ext.
8505 // As a corollary, we don't care what kind of ext it is.
8506 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8507 N0.getOpcode() == ISD::ANY_EXTEND ||
8508 N0.getOpcode() == ISD::SIGN_EXTEND) &&
8509 N0.getOperand(0).getOpcode() == ISD::SHL) {
8510 SDValue N0Op0 = N0.getOperand(0);
8511 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8512 EVT InnerVT = N0Op0.getValueType();
8513 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8515 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8516 ConstantSDNode *RHS) {
8517 APInt c1 = LHS->getAPIntValue();
8518 APInt c2 = RHS->getAPIntValue();
8519 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8520 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8521 (c1 + c2).uge(OpSizeInBits);
8523 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8524 /*AllowUndefs*/ false,
8525 /*AllowTypeMismatch*/ true))
8526 return DAG.getConstant(0, SDLoc(N), VT);
8528 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8529 ConstantSDNode *RHS) {
8530 APInt c1 = LHS->getAPIntValue();
8531 APInt c2 = RHS->getAPIntValue();
8532 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8533 return c2.uge(OpSizeInBits - InnerBitwidth) &&
8534 (c1 + c2).ult(OpSizeInBits);
8536 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8537 /*AllowUndefs*/ false,
8538 /*AllowTypeMismatch*/ true)) {
8539 SDLoc DL(N);
8540 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8541 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8542 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8543 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8547 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8548 // Only fold this if the inner zext has no other uses to avoid increasing
8549 // the total number of instructions.
8550 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8551 N0.getOperand(0).getOpcode() == ISD::SRL) {
8552 SDValue N0Op0 = N0.getOperand(0);
8553 SDValue InnerShiftAmt = N0Op0.getOperand(1);
8555 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8556 APInt c1 = LHS->getAPIntValue();
8557 APInt c2 = RHS->getAPIntValue();
8558 zeroExtendToMatch(c1, c2);
8559 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8561 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8562 /*AllowUndefs*/ false,
8563 /*AllowTypeMismatch*/ true)) {
8564 SDLoc DL(N);
8565 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8566 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8567 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8568 AddToWorklist(NewSHL.getNode());
8569 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8573 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
8574 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
8575 // TODO - support non-uniform vector shift amounts.
8576 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8577 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8578 N0->getFlags().hasExact()) {
8579 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8580 uint64_t C1 = N0C1->getZExtValue();
8581 uint64_t C2 = N1C->getZExtValue();
8582 SDLoc DL(N);
8583 if (C1 <= C2)
8584 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8585 DAG.getConstant(C2 - C1, DL, ShiftVT));
8586 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8587 DAG.getConstant(C1 - C2, DL, ShiftVT));
8591 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8592 // (and (srl x, (sub c1, c2), MASK)
8593 // Only fold this if the inner shift has no other uses -- if it does, folding
8594 // this will increase the total number of instructions.
8595 // TODO - drop hasOneUse requirement if c1 == c2?
8596 // TODO - support non-uniform vector shift amounts.
8597 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8598 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8599 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8600 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8601 uint64_t c1 = N0C1->getZExtValue();
8602 uint64_t c2 = N1C->getZExtValue();
8603 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8604 SDValue Shift;
8605 if (c2 > c1) {
8606 Mask <<= c2 - c1;
8607 SDLoc DL(N);
8608 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8609 DAG.getConstant(c2 - c1, DL, ShiftVT));
8610 } else {
8611 Mask.lshrInPlace(c1 - c2);
8612 SDLoc DL(N);
8613 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8614 DAG.getConstant(c1 - c2, DL, ShiftVT));
8616 SDLoc DL(N0);
8617 return DAG.getNode(ISD::AND, DL, VT, Shift,
8618 DAG.getConstant(Mask, DL, VT));
8623 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8624 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8625 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8626 SDLoc DL(N);
8627 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8628 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8629 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8632 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8633 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8634 // Variant of version done on multiply, except mul by a power of 2 is turned
8635 // into a shift.
8636 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8637 N0.getNode()->hasOneUse() &&
8638 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8639 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8640 TLI.isDesirableToCommuteWithShift(N, Level)) {
8641 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8642 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8643 AddToWorklist(Shl0.getNode());
8644 AddToWorklist(Shl1.getNode());
8645 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8648 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8649 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8650 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8651 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8652 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8653 if (isConstantOrConstantVector(Shl))
8654 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8657 if (N1C && !N1C->isOpaque())
8658 if (SDValue NewSHL = visitShiftByConstant(N))
8659 return NewSHL;
8661 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8662 if (N0.getOpcode() == ISD::VSCALE)
8663 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8664 const APInt &C0 = N0.getConstantOperandAPInt(0);
8665 const APInt &C1 = NC1->getAPIntValue();
8666 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8669 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8670 APInt ShlVal;
8671 if (N0.getOpcode() == ISD::STEP_VECTOR)
8672 if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8673 const APInt &C0 = N0.getConstantOperandAPInt(0);
8674 if (ShlVal.ult(C0.getBitWidth())) {
8675 APInt NewStep = C0 << ShlVal;
8676 return DAG.getStepVector(SDLoc(N), VT, NewStep);
8680 return SDValue();
8683 // Transform a right shift of a multiply into a multiply-high.
8684 // Examples:
8685 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8686 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8687 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8688 const TargetLowering &TLI) {
8689 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8690 "SRL or SRA node is required here!");
8692 // Check the shift amount. Proceed with the transformation if the shift
8693 // amount is constant.
8694 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8695 if (!ShiftAmtSrc)
8696 return SDValue();
8698 SDLoc DL(N);
8700 // The operation feeding into the shift must be a multiply.
8701 SDValue ShiftOperand = N->getOperand(0);
8702 if (ShiftOperand.getOpcode() != ISD::MUL)
8703 return SDValue();
8705 // Both operands must be equivalent extend nodes.
8706 SDValue LeftOp = ShiftOperand.getOperand(0);
8707 SDValue RightOp = ShiftOperand.getOperand(1);
8709 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8710 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8712 if (!IsSignExt && !IsZeroExt)
8713 return SDValue();
8715 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8716 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8718 SDValue MulhRightOp;
8719 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
8720 unsigned ActiveBits = IsSignExt
8721 ? Constant->getAPIntValue().getMinSignedBits()
8722 : Constant->getAPIntValue().getActiveBits();
8723 if (ActiveBits > NarrowVTSize)
8724 return SDValue();
8725 MulhRightOp = DAG.getConstant(
8726 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8727 NarrowVT);
8728 } else {
8729 if (LeftOp.getOpcode() != RightOp.getOpcode())
8730 return SDValue();
8731 // Check that the two extend nodes are the same type.
8732 if (NarrowVT != RightOp.getOperand(0).getValueType())
8733 return SDValue();
8734 MulhRightOp = RightOp.getOperand(0);
8737 EVT WideVT = LeftOp.getValueType();
8738 // Proceed with the transformation if the wide types match.
8739 assert((WideVT == RightOp.getValueType()) &&
8740 "Cannot have a multiply node with two different operand types.");
8742 // Proceed with the transformation if the wide type is twice as large
8743 // as the narrow type.
8744 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
8745 return SDValue();
8747 // Check the shift amount with the narrow type size.
8748 // Proceed with the transformation if the shift amount is the width
8749 // of the narrow type.
8750 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8751 if (ShiftAmt != NarrowVTSize)
8752 return SDValue();
8754 // If the operation feeding into the MUL is a sign extend (sext),
8755 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8756 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8758 // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8759 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8760 return SDValue();
8762 SDValue Result =
8763 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
8764 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
8765 : DAG.getZExtOrTrunc(Result, DL, WideVT));
8768 SDValue DAGCombiner::visitSRA(SDNode *N) {
8769 SDValue N0 = N->getOperand(0);
8770 SDValue N1 = N->getOperand(1);
8771 if (SDValue V = DAG.simplifyShift(N0, N1))
8772 return V;
8774 EVT VT = N0.getValueType();
8775 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8777 // fold (sra c1, c2) -> (sra c1, c2)
8778 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8779 return C;
8781 // Arithmetic shifting an all-sign-bit value is a no-op.
8782 // fold (sra 0, x) -> 0
8783 // fold (sra -1, x) -> -1
8784 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8785 return N0;
8787 // fold vector ops
8788 if (VT.isVector())
8789 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8790 return FoldedVOp;
8792 if (SDValue NewSel = foldBinOpIntoSelect(N))
8793 return NewSel;
8795 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8796 // sext_inreg.
8797 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8798 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8799 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8800 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8801 if (VT.isVector())
8802 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8803 VT.getVectorElementCount());
8804 if (!LegalOperations ||
8805 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8806 TargetLowering::Legal)
8807 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8808 N0.getOperand(0), DAG.getValueType(ExtVT));
8809 // Even if we can't convert to sext_inreg, we might be able to remove
8810 // this shift pair if the input is already sign extended.
8811 if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8812 return N0.getOperand(0);
8815 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8816 // clamp (add c1, c2) to max shift.
8817 if (N0.getOpcode() == ISD::SRA) {
8818 SDLoc DL(N);
8819 EVT ShiftVT = N1.getValueType();
8820 EVT ShiftSVT = ShiftVT.getScalarType();
8821 SmallVector<SDValue, 16> ShiftValues;
8823 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8824 APInt c1 = LHS->getAPIntValue();
8825 APInt c2 = RHS->getAPIntValue();
8826 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8827 APInt Sum = c1 + c2;
8828 unsigned ShiftSum =
8829 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8830 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8831 return true;
8833 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8834 SDValue ShiftValue;
8835 if (N1.getOpcode() == ISD::BUILD_VECTOR)
8836 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8837 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8838 assert(ShiftValues.size() == 1 &&
8839 "Expected matchBinaryPredicate to return one element for "
8840 "SPLAT_VECTORs");
8841 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8842 } else
8843 ShiftValue = ShiftValues[0];
8844 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8848 // fold (sra (shl X, m), (sub result_size, n))
8849 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8850 // result_size - n != m.
8851 // If truncate is free for the target sext(shl) is likely to result in better
8852 // code.
8853 if (N0.getOpcode() == ISD::SHL && N1C) {
8854 // Get the two constanst of the shifts, CN0 = m, CN = n.
8855 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8856 if (N01C) {
8857 LLVMContext &Ctx = *DAG.getContext();
8858 // Determine what the truncate's result bitsize and type would be.
8859 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8861 if (VT.isVector())
8862 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8864 // Determine the residual right-shift amount.
8865 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8867 // If the shift is not a no-op (in which case this should be just a sign
8868 // extend already), the truncated to type is legal, sign_extend is legal
8869 // on that type, and the truncate to that type is both legal and free,
8870 // perform the transform.
8871 if ((ShiftAmt > 0) &&
8872 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8873 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8874 TLI.isTruncateFree(VT, TruncVT)) {
8875 SDLoc DL(N);
8876 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8877 getShiftAmountTy(N0.getOperand(0).getValueType()));
8878 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8879 N0.getOperand(0), Amt);
8880 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8881 Shift);
8882 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8883 N->getValueType(0), Trunc);
8888 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8889 // sra (add (shl X, N1C), AddC), N1C -->
8890 // sext (add (trunc X to (width - N1C)), AddC')
8891 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8892 N0.getOperand(0).getOpcode() == ISD::SHL &&
8893 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8894 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8895 SDValue Shl = N0.getOperand(0);
8896 // Determine what the truncate's type would be and ask the target if that
8897 // is a free operation.
8898 LLVMContext &Ctx = *DAG.getContext();
8899 unsigned ShiftAmt = N1C->getZExtValue();
8900 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8901 if (VT.isVector())
8902 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8904 // TODO: The simple type check probably belongs in the default hook
8905 // implementation and/or target-specific overrides (because
8906 // non-simple types likely require masking when legalized), but that
8907 // restriction may conflict with other transforms.
8908 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8909 TLI.isTruncateFree(VT, TruncVT)) {
8910 SDLoc DL(N);
8911 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8912 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8913 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8914 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8915 return DAG.getSExtOrTrunc(Add, DL, VT);
8920 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8921 if (N1.getOpcode() == ISD::TRUNCATE &&
8922 N1.getOperand(0).getOpcode() == ISD::AND) {
8923 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8924 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8927 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8928 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8929 // if c1 is equal to the number of bits the trunc removes
8930 // TODO - support non-uniform vector shift amounts.
8931 if (N0.getOpcode() == ISD::TRUNCATE &&
8932 (N0.getOperand(0).getOpcode() == ISD::SRL ||
8933 N0.getOperand(0).getOpcode() == ISD::SRA) &&
8934 N0.getOperand(0).hasOneUse() &&
8935 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8936 SDValue N0Op0 = N0.getOperand(0);
8937 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8938 EVT LargeVT = N0Op0.getValueType();
8939 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8940 if (LargeShift->getAPIntValue() == TruncBits) {
8941 SDLoc DL(N);
8942 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8943 getShiftAmountTy(LargeVT));
8944 SDValue SRA =
8945 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8946 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8951 // Simplify, based on bits shifted out of the LHS.
8952 if (SimplifyDemandedBits(SDValue(N, 0)))
8953 return SDValue(N, 0);
8955 // If the sign bit is known to be zero, switch this to a SRL.
8956 if (DAG.SignBitIsZero(N0))
8957 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8959 if (N1C && !N1C->isOpaque())
8960 if (SDValue NewSRA = visitShiftByConstant(N))
8961 return NewSRA;
8963 // Try to transform this shift into a multiply-high if
8964 // it matches the appropriate pattern detected in combineShiftToMULH.
8965 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8966 return MULH;
8968 return SDValue();
8971 SDValue DAGCombiner::visitSRL(SDNode *N) {
8972 SDValue N0 = N->getOperand(0);
8973 SDValue N1 = N->getOperand(1);
8974 if (SDValue V = DAG.simplifyShift(N0, N1))
8975 return V;
8977 EVT VT = N0.getValueType();
8978 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8980 // fold (srl c1, c2) -> c1 >>u c2
8981 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8982 return C;
8984 // fold vector ops
8985 if (VT.isVector())
8986 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8987 return FoldedVOp;
8989 if (SDValue NewSel = foldBinOpIntoSelect(N))
8990 return NewSel;
8992 // if (srl x, c) is known to be zero, return 0
8993 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8994 if (N1C &&
8995 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8996 return DAG.getConstant(0, SDLoc(N), VT);
8998 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8999 if (N0.getOpcode() == ISD::SRL) {
9000 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9001 ConstantSDNode *RHS) {
9002 APInt c1 = LHS->getAPIntValue();
9003 APInt c2 = RHS->getAPIntValue();
9004 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9005 return (c1 + c2).uge(OpSizeInBits);
9007 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9008 return DAG.getConstant(0, SDLoc(N), VT);
9010 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9011 ConstantSDNode *RHS) {
9012 APInt c1 = LHS->getAPIntValue();
9013 APInt c2 = RHS->getAPIntValue();
9014 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9015 return (c1 + c2).ult(OpSizeInBits);
9017 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9018 SDLoc DL(N);
9019 EVT ShiftVT = N1.getValueType();
9020 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9021 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9025 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9026 N0.getOperand(0).getOpcode() == ISD::SRL) {
9027 SDValue InnerShift = N0.getOperand(0);
9028 // TODO - support non-uniform vector shift amounts.
9029 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9030 uint64_t c1 = N001C->getZExtValue();
9031 uint64_t c2 = N1C->getZExtValue();
9032 EVT InnerShiftVT = InnerShift.getValueType();
9033 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9034 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9035 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9036 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9037 if (c1 + OpSizeInBits == InnerShiftSize) {
9038 SDLoc DL(N);
9039 if (c1 + c2 >= InnerShiftSize)
9040 return DAG.getConstant(0, DL, VT);
9041 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9042 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9043 InnerShift.getOperand(0), NewShiftAmt);
9044 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9046 // In the more general case, we can clear the high bits after the shift:
9047 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9048 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9049 c1 + c2 < InnerShiftSize) {
9050 SDLoc DL(N);
9051 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9052 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9053 InnerShift.getOperand(0), NewShiftAmt);
9054 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9055 OpSizeInBits - c2),
9056 DL, InnerShiftVT);
9057 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9058 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9063 // fold (srl (shl x, c), c) -> (and x, cst2)
9064 // TODO - (srl (shl x, c1), c2).
9065 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9066 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9067 SDLoc DL(N);
9068 SDValue Mask =
9069 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9070 AddToWorklist(Mask.getNode());
9071 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9074 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9075 // TODO - support non-uniform vector shift amounts.
9076 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9077 // Shifting in all undef bits?
9078 EVT SmallVT = N0.getOperand(0).getValueType();
9079 unsigned BitSize = SmallVT.getScalarSizeInBits();
9080 if (N1C->getAPIntValue().uge(BitSize))
9081 return DAG.getUNDEF(VT);
9083 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9084 uint64_t ShiftAmt = N1C->getZExtValue();
9085 SDLoc DL0(N0);
9086 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9087 N0.getOperand(0),
9088 DAG.getConstant(ShiftAmt, DL0,
9089 getShiftAmountTy(SmallVT)));
9090 AddToWorklist(SmallShift.getNode());
9091 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9092 SDLoc DL(N);
9093 return DAG.getNode(ISD::AND, DL, VT,
9094 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9095 DAG.getConstant(Mask, DL, VT));
9099 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
9100 // bit, which is unmodified by sra.
9101 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9102 if (N0.getOpcode() == ISD::SRA)
9103 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9106 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
9107 if (N1C && N0.getOpcode() == ISD::CTLZ &&
9108 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9109 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9111 // If any of the input bits are KnownOne, then the input couldn't be all
9112 // zeros, thus the result of the srl will always be zero.
9113 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9115 // If all of the bits input the to ctlz node are known to be zero, then
9116 // the result of the ctlz is "32" and the result of the shift is one.
9117 APInt UnknownBits = ~Known.Zero;
9118 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9120 // Otherwise, check to see if there is exactly one bit input to the ctlz.
9121 if (UnknownBits.isPowerOf2()) {
9122 // Okay, we know that only that the single bit specified by UnknownBits
9123 // could be set on input to the CTLZ node. If this bit is set, the SRL
9124 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9125 // to an SRL/XOR pair, which is likely to simplify more.
9126 unsigned ShAmt = UnknownBits.countTrailingZeros();
9127 SDValue Op = N0.getOperand(0);
9129 if (ShAmt) {
9130 SDLoc DL(N0);
9131 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9132 DAG.getConstant(ShAmt, DL,
9133 getShiftAmountTy(Op.getValueType())));
9134 AddToWorklist(Op.getNode());
9137 SDLoc DL(N);
9138 return DAG.getNode(ISD::XOR, DL, VT,
9139 Op, DAG.getConstant(1, DL, VT));
9143 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9144 if (N1.getOpcode() == ISD::TRUNCATE &&
9145 N1.getOperand(0).getOpcode() == ISD::AND) {
9146 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9147 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9150 // fold operands of srl based on knowledge that the low bits are not
9151 // demanded.
9152 if (SimplifyDemandedBits(SDValue(N, 0)))
9153 return SDValue(N, 0);
9155 if (N1C && !N1C->isOpaque())
9156 if (SDValue NewSRL = visitShiftByConstant(N))
9157 return NewSRL;
9159 // Attempt to convert a srl of a load into a narrower zero-extending load.
9160 if (SDValue NarrowLoad = reduceLoadWidth(N))
9161 return NarrowLoad;
9163 // Here is a common situation. We want to optimize:
9165 // %a = ...
9166 // %b = and i32 %a, 2
9167 // %c = srl i32 %b, 1
9168 // brcond i32 %c ...
9170 // into
9172 // %a = ...
9173 // %b = and %a, 2
9174 // %c = setcc eq %b, 0
9175 // brcond %c ...
9177 // However when after the source operand of SRL is optimized into AND, the SRL
9178 // itself may not be optimized further. Look for it and add the BRCOND into
9179 // the worklist.
9180 if (N->hasOneUse()) {
9181 SDNode *Use = *N->use_begin();
9182 if (Use->getOpcode() == ISD::BRCOND)
9183 AddToWorklist(Use);
9184 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9185 // Also look pass the truncate.
9186 Use = *Use->use_begin();
9187 if (Use->getOpcode() == ISD::BRCOND)
9188 AddToWorklist(Use);
9192 // Try to transform this shift into a multiply-high if
9193 // it matches the appropriate pattern detected in combineShiftToMULH.
9194 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9195 return MULH;
9197 return SDValue();
9200 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9201 EVT VT = N->getValueType(0);
9202 SDValue N0 = N->getOperand(0);
9203 SDValue N1 = N->getOperand(1);
9204 SDValue N2 = N->getOperand(2);
9205 bool IsFSHL = N->getOpcode() == ISD::FSHL;
9206 unsigned BitWidth = VT.getScalarSizeInBits();
9208 // fold (fshl N0, N1, 0) -> N0
9209 // fold (fshr N0, N1, 0) -> N1
9210 if (isPowerOf2_32(BitWidth))
9211 if (DAG.MaskedValueIsZero(
9212 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9213 return IsFSHL ? N0 : N1;
9215 auto IsUndefOrZero = [](SDValue V) {
9216 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9219 // TODO - support non-uniform vector shift amounts.
9220 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9221 EVT ShAmtTy = N2.getValueType();
9223 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9224 if (Cst->getAPIntValue().uge(BitWidth)) {
9225 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9226 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9227 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9230 unsigned ShAmt = Cst->getZExtValue();
9231 if (ShAmt == 0)
9232 return IsFSHL ? N0 : N1;
9234 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9235 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9236 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9237 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9238 if (IsUndefOrZero(N0))
9239 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9240 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9241 SDLoc(N), ShAmtTy));
9242 if (IsUndefOrZero(N1))
9243 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9244 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9245 SDLoc(N), ShAmtTy));
9247 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9248 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9249 // TODO - bigendian support once we have test coverage.
9250 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9251 // TODO - permit LHS EXTLOAD if extensions are shifted out.
9252 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9253 !DAG.getDataLayout().isBigEndian()) {
9254 auto *LHS = dyn_cast<LoadSDNode>(N0);
9255 auto *RHS = dyn_cast<LoadSDNode>(N1);
9256 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9257 LHS->getAddressSpace() == RHS->getAddressSpace() &&
9258 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9259 ISD::isNON_EXTLoad(LHS)) {
9260 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9261 SDLoc DL(RHS);
9262 uint64_t PtrOff =
9263 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9264 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9265 bool Fast = false;
9266 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9267 RHS->getAddressSpace(), NewAlign,
9268 RHS->getMemOperand()->getFlags(), &Fast) &&
9269 Fast) {
9270 SDValue NewPtr = DAG.getMemBasePlusOffset(
9271 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9272 AddToWorklist(NewPtr.getNode());
9273 SDValue Load = DAG.getLoad(
9274 VT, DL, RHS->getChain(), NewPtr,
9275 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9276 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9277 // Replace the old load's chain with the new load's chain.
9278 WorklistRemover DeadNodes(*this);
9279 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9280 return Load;
9287 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9288 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9289 // iff We know the shift amount is in range.
9290 // TODO: when is it worth doing SUB(BW, N2) as well?
9291 if (isPowerOf2_32(BitWidth)) {
9292 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9293 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9294 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9295 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9296 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9299 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9300 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9301 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9302 // is legal as well we might be better off avoiding non-constant (BW - N2).
9303 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9304 if (N0 == N1 && hasOperation(RotOpc, VT))
9305 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9307 // Simplify, based on bits shifted out of N0/N1.
9308 if (SimplifyDemandedBits(SDValue(N, 0)))
9309 return SDValue(N, 0);
9311 return SDValue();
9314 // Given a ABS node, detect the following pattern:
9315 // (ABS (SUB (EXTEND a), (EXTEND b))).
9316 // Generates UABD/SABD instruction.
9317 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9318 const TargetLowering &TLI) {
9319 SDValue AbsOp1 = N->getOperand(0);
9320 SDValue Op0, Op1;
9322 if (AbsOp1.getOpcode() != ISD::SUB)
9323 return SDValue();
9325 Op0 = AbsOp1.getOperand(0);
9326 Op1 = AbsOp1.getOperand(1);
9328 unsigned Opc0 = Op0.getOpcode();
9329 // Check if the operands of the sub are (zero|sign)-extended.
9330 if (Opc0 != Op1.getOpcode() ||
9331 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9332 return SDValue();
9334 EVT VT1 = Op0.getOperand(0).getValueType();
9335 EVT VT2 = Op1.getOperand(0).getValueType();
9336 // Check if the operands are of same type and valid size.
9337 unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9338 if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9339 return SDValue();
9341 Op0 = Op0.getOperand(0);
9342 Op1 = Op1.getOperand(0);
9343 SDValue ABD =
9344 DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9345 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9348 SDValue DAGCombiner::visitABS(SDNode *N) {
9349 SDValue N0 = N->getOperand(0);
9350 EVT VT = N->getValueType(0);
9352 // fold (abs c1) -> c2
9353 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9354 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9355 // fold (abs (abs x)) -> (abs x)
9356 if (N0.getOpcode() == ISD::ABS)
9357 return N0;
9358 // fold (abs x) -> x iff not-negative
9359 if (DAG.SignBitIsZero(N0))
9360 return N0;
9362 if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9363 return ABD;
9365 return SDValue();
9368 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9369 SDValue N0 = N->getOperand(0);
9370 EVT VT = N->getValueType(0);
9372 // fold (bswap c1) -> c2
9373 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9374 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9375 // fold (bswap (bswap x)) -> x
9376 if (N0.getOpcode() == ISD::BSWAP)
9377 return N0->getOperand(0);
9378 return SDValue();
9381 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9382 SDValue N0 = N->getOperand(0);
9383 EVT VT = N->getValueType(0);
9385 // fold (bitreverse c1) -> c2
9386 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9387 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9388 // fold (bitreverse (bitreverse x)) -> x
9389 if (N0.getOpcode() == ISD::BITREVERSE)
9390 return N0.getOperand(0);
9391 return SDValue();
9394 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9395 SDValue N0 = N->getOperand(0);
9396 EVT VT = N->getValueType(0);
9398 // fold (ctlz c1) -> c2
9399 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9400 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9402 // If the value is known never to be zero, switch to the undef version.
9403 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9404 if (DAG.isKnownNeverZero(N0))
9405 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9408 return SDValue();
9411 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9412 SDValue N0 = N->getOperand(0);
9413 EVT VT = N->getValueType(0);
9415 // fold (ctlz_zero_undef c1) -> c2
9416 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9417 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9418 return SDValue();
9421 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9422 SDValue N0 = N->getOperand(0);
9423 EVT VT = N->getValueType(0);
9425 // fold (cttz c1) -> c2
9426 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9427 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9429 // If the value is known never to be zero, switch to the undef version.
9430 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9431 if (DAG.isKnownNeverZero(N0))
9432 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9435 return SDValue();
9438 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9439 SDValue N0 = N->getOperand(0);
9440 EVT VT = N->getValueType(0);
9442 // fold (cttz_zero_undef c1) -> c2
9443 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9444 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9445 return SDValue();
9448 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9449 SDValue N0 = N->getOperand(0);
9450 EVT VT = N->getValueType(0);
9452 // fold (ctpop c1) -> c2
9453 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9454 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9455 return SDValue();
9458 // FIXME: This should be checking for no signed zeros on individual operands, as
9459 // well as no nans.
9460 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9461 SDValue RHS,
9462 const TargetLowering &TLI) {
9463 const TargetOptions &Options = DAG.getTarget().Options;
9464 EVT VT = LHS.getValueType();
9466 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9467 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9468 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9471 /// Generate Min/Max node
9472 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9473 SDValue RHS, SDValue True, SDValue False,
9474 ISD::CondCode CC, const TargetLowering &TLI,
9475 SelectionDAG &DAG) {
9476 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9477 return SDValue();
9479 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9480 switch (CC) {
9481 case ISD::SETOLT:
9482 case ISD::SETOLE:
9483 case ISD::SETLT:
9484 case ISD::SETLE:
9485 case ISD::SETULT:
9486 case ISD::SETULE: {
9487 // Since it's known never nan to get here already, either fminnum or
9488 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9489 // expanded in terms of it.
9490 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9491 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9492 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9494 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9495 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9496 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9497 return SDValue();
9499 case ISD::SETOGT:
9500 case ISD::SETOGE:
9501 case ISD::SETGT:
9502 case ISD::SETGE:
9503 case ISD::SETUGT:
9504 case ISD::SETUGE: {
9505 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9506 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9507 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9509 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9510 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9511 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9512 return SDValue();
9514 default:
9515 return SDValue();
9519 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9520 /// the condition operand sign-bit across the value width and use it as a mask.
9521 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9522 SDValue Cond = N->getOperand(0);
9523 SDValue C1 = N->getOperand(1);
9524 SDValue C2 = N->getOperand(2);
9525 if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9526 return SDValue();
9528 EVT VT = N->getValueType(0);
9529 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9530 VT != Cond.getOperand(0).getValueType())
9531 return SDValue();
9533 // The inverted-condition + commuted-select variants of these patterns are
9534 // canonicalized to these forms in IR.
9535 SDValue X = Cond.getOperand(0);
9536 SDValue CondC = Cond.getOperand(1);
9537 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9538 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9539 isAllOnesOrAllOnesSplat(C2)) {
9540 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9541 SDLoc DL(N);
9542 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9543 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9544 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9546 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9547 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9548 SDLoc DL(N);
9549 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9550 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9551 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9553 return SDValue();
9556 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9557 SDValue Cond = N->getOperand(0);
9558 SDValue N1 = N->getOperand(1);
9559 SDValue N2 = N->getOperand(2);
9560 EVT VT = N->getValueType(0);
9561 EVT CondVT = Cond.getValueType();
9562 SDLoc DL(N);
9564 if (!VT.isInteger())
9565 return SDValue();
9567 auto *C1 = dyn_cast<ConstantSDNode>(N1);
9568 auto *C2 = dyn_cast<ConstantSDNode>(N2);
9569 if (!C1 || !C2)
9570 return SDValue();
9572 // Only do this before legalization to avoid conflicting with target-specific
9573 // transforms in the other direction (create a select from a zext/sext). There
9574 // is also a target-independent combine here in DAGCombiner in the other
9575 // direction for (select Cond, -1, 0) when the condition is not i1.
9576 if (CondVT == MVT::i1 && !LegalOperations) {
9577 if (C1->isZero() && C2->isOne()) {
9578 // select Cond, 0, 1 --> zext (!Cond)
9579 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9580 if (VT != MVT::i1)
9581 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9582 return NotCond;
9584 if (C1->isZero() && C2->isAllOnes()) {
9585 // select Cond, 0, -1 --> sext (!Cond)
9586 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9587 if (VT != MVT::i1)
9588 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9589 return NotCond;
9591 if (C1->isOne() && C2->isZero()) {
9592 // select Cond, 1, 0 --> zext (Cond)
9593 if (VT != MVT::i1)
9594 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9595 return Cond;
9597 if (C1->isAllOnes() && C2->isZero()) {
9598 // select Cond, -1, 0 --> sext (Cond)
9599 if (VT != MVT::i1)
9600 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9601 return Cond;
9604 // Use a target hook because some targets may prefer to transform in the
9605 // other direction.
9606 if (TLI.convertSelectOfConstantsToMath(VT)) {
9607 // For any constants that differ by 1, we can transform the select into an
9608 // extend and add.
9609 const APInt &C1Val = C1->getAPIntValue();
9610 const APInt &C2Val = C2->getAPIntValue();
9611 if (C1Val - 1 == C2Val) {
9612 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9613 if (VT != MVT::i1)
9614 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9615 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9617 if (C1Val + 1 == C2Val) {
9618 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9619 if (VT != MVT::i1)
9620 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9621 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9624 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9625 if (C1Val.isPowerOf2() && C2Val.isZero()) {
9626 if (VT != MVT::i1)
9627 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9628 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9629 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9632 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9633 return V;
9636 return SDValue();
9639 // fold (select Cond, 0, 1) -> (xor Cond, 1)
9640 // We can't do this reliably if integer based booleans have different contents
9641 // to floating point based booleans. This is because we can't tell whether we
9642 // have an integer-based boolean or a floating-point-based boolean unless we
9643 // can find the SETCC that produced it and inspect its operands. This is
9644 // fairly easy if C is the SETCC node, but it can potentially be
9645 // undiscoverable (or not reasonably discoverable). For example, it could be
9646 // in another basic block or it could require searching a complicated
9647 // expression.
9648 if (CondVT.isInteger() &&
9649 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9650 TargetLowering::ZeroOrOneBooleanContent &&
9651 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9652 TargetLowering::ZeroOrOneBooleanContent &&
9653 C1->isZero() && C2->isOne()) {
9654 SDValue NotCond =
9655 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9656 if (VT.bitsEq(CondVT))
9657 return NotCond;
9658 return DAG.getZExtOrTrunc(NotCond, DL, VT);
9661 return SDValue();
9664 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9665 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9666 "Expected a (v)select");
9667 SDValue Cond = N->getOperand(0);
9668 SDValue T = N->getOperand(1), F = N->getOperand(2);
9669 EVT VT = N->getValueType(0);
9670 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9671 return SDValue();
9673 // select Cond, Cond, F --> or Cond, F
9674 // select Cond, 1, F --> or Cond, F
9675 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9676 return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9678 // select Cond, T, Cond --> and Cond, T
9679 // select Cond, T, 0 --> and Cond, T
9680 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9681 return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9683 // select Cond, T, 1 --> or (not Cond), T
9684 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9685 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9686 return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9689 // select Cond, 0, F --> and (not Cond), F
9690 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9691 SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9692 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9695 return SDValue();
9698 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
9699 SDValue N0 = N->getOperand(0);
9700 SDValue N1 = N->getOperand(1);
9701 SDValue N2 = N->getOperand(2);
9702 EVT VT = N->getValueType(0);
9703 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
9704 return SDValue();
9706 SDValue Cond0 = N0.getOperand(0);
9707 SDValue Cond1 = N0.getOperand(1);
9708 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9709 if (VT != Cond0.getValueType())
9710 return SDValue();
9712 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
9713 // compare is inverted from that pattern ("Cond0 s> -1").
9714 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
9715 ; // This is the pattern we are looking for.
9716 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
9717 std::swap(N1, N2);
9718 else
9719 return SDValue();
9721 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
9722 if (isNullOrNullSplat(N2)) {
9723 SDLoc DL(N);
9724 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9725 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9726 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
9729 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
9730 if (isAllOnesOrAllOnesSplat(N1)) {
9731 SDLoc DL(N);
9732 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9733 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9734 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
9737 // If we have to invert the sign bit mask, only do that transform if the
9738 // target has a bitwise 'and not' instruction (the invert is free).
9739 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
9740 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9741 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
9742 SDLoc DL(N);
9743 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9744 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9745 SDValue Not = DAG.getNOT(DL, Sra, VT);
9746 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
9749 // TODO: There's another pattern in this family, but it may require
9750 // implementing hasOrNot() to check for profitability:
9751 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
9753 return SDValue();
9756 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9757 SDValue N0 = N->getOperand(0);
9758 SDValue N1 = N->getOperand(1);
9759 SDValue N2 = N->getOperand(2);
9760 EVT VT = N->getValueType(0);
9761 EVT VT0 = N0.getValueType();
9762 SDLoc DL(N);
9763 SDNodeFlags Flags = N->getFlags();
9765 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9766 return V;
9768 if (SDValue V = foldSelectOfConstants(N))
9769 return V;
9771 if (SDValue V = foldBoolSelectToLogic(N, DAG))
9772 return V;
9774 // If we can fold this based on the true/false value, do so.
9775 if (SimplifySelectOps(N, N1, N2))
9776 return SDValue(N, 0); // Don't revisit N.
9778 if (VT0 == MVT::i1) {
9779 // The code in this block deals with the following 2 equivalences:
9780 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9781 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9782 // The target can specify its preferred form with the
9783 // shouldNormalizeToSelectSequence() callback. However we always transform
9784 // to the right anyway if we find the inner select exists in the DAG anyway
9785 // and we always transform to the left side if we know that we can further
9786 // optimize the combination of the conditions.
9787 bool normalizeToSequence =
9788 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9789 // select (and Cond0, Cond1), X, Y
9790 // -> select Cond0, (select Cond1, X, Y), Y
9791 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9792 SDValue Cond0 = N0->getOperand(0);
9793 SDValue Cond1 = N0->getOperand(1);
9794 SDValue InnerSelect =
9795 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9796 if (normalizeToSequence || !InnerSelect.use_empty())
9797 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9798 InnerSelect, N2, Flags);
9799 // Cleanup on failure.
9800 if (InnerSelect.use_empty())
9801 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9803 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9804 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9805 SDValue Cond0 = N0->getOperand(0);
9806 SDValue Cond1 = N0->getOperand(1);
9807 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9808 Cond1, N1, N2, Flags);
9809 if (normalizeToSequence || !InnerSelect.use_empty())
9810 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9811 InnerSelect, Flags);
9812 // Cleanup on failure.
9813 if (InnerSelect.use_empty())
9814 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9817 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9818 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9819 SDValue N1_0 = N1->getOperand(0);
9820 SDValue N1_1 = N1->getOperand(1);
9821 SDValue N1_2 = N1->getOperand(2);
9822 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9823 // Create the actual and node if we can generate good code for it.
9824 if (!normalizeToSequence) {
9825 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9826 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9827 N2, Flags);
9829 // Otherwise see if we can optimize the "and" to a better pattern.
9830 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9831 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9832 N2, Flags);
9836 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9837 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9838 SDValue N2_0 = N2->getOperand(0);
9839 SDValue N2_1 = N2->getOperand(1);
9840 SDValue N2_2 = N2->getOperand(2);
9841 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9842 // Create the actual or node if we can generate good code for it.
9843 if (!normalizeToSequence) {
9844 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9845 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9846 N2_2, Flags);
9848 // Otherwise see if we can optimize to a better pattern.
9849 if (SDValue Combined = visitORLike(N0, N2_0, N))
9850 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9851 N2_2, Flags);
9856 // select (not Cond), N1, N2 -> select Cond, N2, N1
9857 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9858 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9859 SelectOp->setFlags(Flags);
9860 return SelectOp;
9863 // Fold selects based on a setcc into other things, such as min/max/abs.
9864 if (N0.getOpcode() == ISD::SETCC) {
9865 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9866 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9868 // select (fcmp lt x, y), x, y -> fminnum x, y
9869 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9871 // This is OK if we don't care what happens if either operand is a NaN.
9872 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9873 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9874 CC, TLI, DAG))
9875 return FMinMax;
9877 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9878 // This is conservatively limited to pre-legal-operations to give targets
9879 // a chance to reverse the transform if they want to do that. Also, it is
9880 // unlikely that the pattern would be formed late, so it's probably not
9881 // worth going through the other checks.
9882 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9883 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9884 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9885 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9886 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9887 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9888 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9889 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9891 // The IR equivalent of this transform would have this form:
9892 // %a = add %x, C
9893 // %c = icmp ugt %x, ~C
9894 // %r = select %c, -1, %a
9895 // =>
9896 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9897 // %u0 = extractvalue %u, 0
9898 // %u1 = extractvalue %u, 1
9899 // %r = select %u1, -1, %u0
9900 SDVTList VTs = DAG.getVTList(VT, VT0);
9901 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9902 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9906 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9907 (!LegalOperations &&
9908 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9909 // Any flags available in a select/setcc fold will be on the setcc as they
9910 // migrated from fcmp
9911 Flags = N0.getNode()->getFlags();
9912 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9913 N2, N0.getOperand(2));
9914 SelectNode->setFlags(Flags);
9915 return SelectNode;
9918 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9919 return NewSel;
9922 if (!VT.isVector())
9923 if (SDValue BinOp = foldSelectOfBinops(N))
9924 return BinOp;
9926 return SDValue();
9929 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9930 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9931 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9932 SDLoc DL(N);
9933 SDValue Cond = N->getOperand(0);
9934 SDValue LHS = N->getOperand(1);
9935 SDValue RHS = N->getOperand(2);
9936 EVT VT = N->getValueType(0);
9937 int NumElems = VT.getVectorNumElements();
9938 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9939 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9940 Cond.getOpcode() == ISD::BUILD_VECTOR);
9942 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9943 // binary ones here.
9944 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9945 return SDValue();
9947 // We're sure we have an even number of elements due to the
9948 // concat_vectors we have as arguments to vselect.
9949 // Skip BV elements until we find one that's not an UNDEF
9950 // After we find an UNDEF element, keep looping until we get to half the
9951 // length of the BV and see if all the non-undef nodes are the same.
9952 ConstantSDNode *BottomHalf = nullptr;
9953 for (int i = 0; i < NumElems / 2; ++i) {
9954 if (Cond->getOperand(i)->isUndef())
9955 continue;
9957 if (BottomHalf == nullptr)
9958 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9959 else if (Cond->getOperand(i).getNode() != BottomHalf)
9960 return SDValue();
9963 // Do the same for the second half of the BuildVector
9964 ConstantSDNode *TopHalf = nullptr;
9965 for (int i = NumElems / 2; i < NumElems; ++i) {
9966 if (Cond->getOperand(i)->isUndef())
9967 continue;
9969 if (TopHalf == nullptr)
9970 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9971 else if (Cond->getOperand(i).getNode() != TopHalf)
9972 return SDValue();
9975 assert(TopHalf && BottomHalf &&
9976 "One half of the selector was all UNDEFs and the other was all the "
9977 "same value. This should have been addressed before this function.");
9978 return DAG.getNode(
9979 ISD::CONCAT_VECTORS, DL, VT,
9980 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
9981 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
9984 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9985 if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9986 return false;
9988 // For now we check only the LHS of the add.
9989 SDValue LHS = Index.getOperand(0);
9990 SDValue SplatVal = DAG.getSplatValue(LHS);
9991 if (!SplatVal)
9992 return false;
9994 BasePtr = SplatVal;
9995 Index = Index.getOperand(1);
9996 return true;
9999 // Fold sext/zext of index into index type.
10000 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
10001 bool Scaled, SelectionDAG &DAG) {
10002 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10004 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10005 SDValue Op = Index.getOperand(0);
10006 MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
10007 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10008 Index = Op;
10009 return true;
10013 if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10014 SDValue Op = Index.getOperand(0);
10015 MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
10016 if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10017 Index = Op;
10018 return true;
10022 return false;
10025 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10026 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10027 SDValue Mask = MSC->getMask();
10028 SDValue Chain = MSC->getChain();
10029 SDValue Index = MSC->getIndex();
10030 SDValue Scale = MSC->getScale();
10031 SDValue StoreVal = MSC->getValue();
10032 SDValue BasePtr = MSC->getBasePtr();
10033 SDLoc DL(N);
10035 // Zap scatters with a zero mask.
10036 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10037 return Chain;
10039 if (refineUniformBase(BasePtr, Index, DAG)) {
10040 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10041 return DAG.getMaskedScatter(
10042 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10043 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10046 if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10047 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10048 return DAG.getMaskedScatter(
10049 DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10050 MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10053 return SDValue();
10056 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10057 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10058 SDValue Mask = MST->getMask();
10059 SDValue Chain = MST->getChain();
10060 SDValue Value = MST->getValue();
10061 SDValue Ptr = MST->getBasePtr();
10062 SDLoc DL(N);
10064 // Zap masked stores with a zero mask.
10065 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10066 return Chain;
10068 // If this is a masked load with an all ones mask, we can use a unmasked load.
10069 // FIXME: Can we do this for indexed, compressing, or truncating stores?
10070 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10071 !MST->isCompressingStore() && !MST->isTruncatingStore())
10072 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10073 MST->getBasePtr(), MST->getPointerInfo(),
10074 MST->getOriginalAlign(), MachineMemOperand::MOStore,
10075 MST->getAAInfo());
10077 // Try transforming N to an indexed store.
10078 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10079 return SDValue(N, 0);
10081 if (MST->isTruncatingStore() && MST->isUnindexed() &&
10082 Value.getValueType().isInteger() &&
10083 (!isa<ConstantSDNode>(Value) ||
10084 !cast<ConstantSDNode>(Value)->isOpaque())) {
10085 APInt TruncDemandedBits =
10086 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10087 MST->getMemoryVT().getScalarSizeInBits());
10089 // See if we can simplify the operation with
10090 // SimplifyDemandedBits, which only works if the value has a single use.
10091 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10092 // Re-visit the store if anything changed and the store hasn't been merged
10093 // with another node (N is deleted) SimplifyDemandedBits will add Value's
10094 // node back to the worklist if necessary, but we also need to re-visit
10095 // the Store node itself.
10096 if (N->getOpcode() != ISD::DELETED_NODE)
10097 AddToWorklist(N);
10098 return SDValue(N, 0);
10102 // If this is a TRUNC followed by a masked store, fold this into a masked
10103 // truncating store. We can do this even if this is already a masked
10104 // truncstore.
10105 if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
10106 MST->isUnindexed() &&
10107 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10108 MST->getMemoryVT(), LegalOperations)) {
10109 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10110 Value.getOperand(0).getValueType());
10111 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10112 MST->getOffset(), Mask, MST->getMemoryVT(),
10113 MST->getMemOperand(), MST->getAddressingMode(),
10114 /*IsTruncating=*/true);
10117 return SDValue();
10120 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10121 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10122 SDValue Mask = MGT->getMask();
10123 SDValue Chain = MGT->getChain();
10124 SDValue Index = MGT->getIndex();
10125 SDValue Scale = MGT->getScale();
10126 SDValue PassThru = MGT->getPassThru();
10127 SDValue BasePtr = MGT->getBasePtr();
10128 SDLoc DL(N);
10130 // Zap gathers with a zero mask.
10131 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10132 return CombineTo(N, PassThru, MGT->getChain());
10134 if (refineUniformBase(BasePtr, Index, DAG)) {
10135 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10136 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10137 MGT->getMemoryVT(), DL, Ops,
10138 MGT->getMemOperand(), MGT->getIndexType(),
10139 MGT->getExtensionType());
10142 if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10143 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10144 return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10145 MGT->getMemoryVT(), DL, Ops,
10146 MGT->getMemOperand(), MGT->getIndexType(),
10147 MGT->getExtensionType());
10150 return SDValue();
10153 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10154 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10155 SDValue Mask = MLD->getMask();
10156 SDLoc DL(N);
10158 // Zap masked loads with a zero mask.
10159 if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10160 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10162 // If this is a masked load with an all ones mask, we can use a unmasked load.
10163 // FIXME: Can we do this for indexed, expanding, or extending loads?
10164 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10165 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10166 SDValue NewLd = DAG.getLoad(
10167 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10168 MLD->getPointerInfo(), MLD->getOriginalAlign(),
10169 MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10170 return CombineTo(N, NewLd, NewLd.getValue(1));
10173 // Try transforming N to an indexed load.
10174 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10175 return SDValue(N, 0);
10177 return SDValue();
10180 /// A vector select of 2 constant vectors can be simplified to math/logic to
10181 /// avoid a variable select instruction and possibly avoid constant loads.
10182 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10183 SDValue Cond = N->getOperand(0);
10184 SDValue N1 = N->getOperand(1);
10185 SDValue N2 = N->getOperand(2);
10186 EVT VT = N->getValueType(0);
10187 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10188 !TLI.convertSelectOfConstantsToMath(VT) ||
10189 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10190 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10191 return SDValue();
10193 // Check if we can use the condition value to increment/decrement a single
10194 // constant value. This simplifies a select to an add and removes a constant
10195 // load/materialization from the general case.
10196 bool AllAddOne = true;
10197 bool AllSubOne = true;
10198 unsigned Elts = VT.getVectorNumElements();
10199 for (unsigned i = 0; i != Elts; ++i) {
10200 SDValue N1Elt = N1.getOperand(i);
10201 SDValue N2Elt = N2.getOperand(i);
10202 if (N1Elt.isUndef() || N2Elt.isUndef())
10203 continue;
10204 if (N1Elt.getValueType() != N2Elt.getValueType())
10205 continue;
10207 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10208 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10209 if (C1 != C2 + 1)
10210 AllAddOne = false;
10211 if (C1 != C2 - 1)
10212 AllSubOne = false;
10215 // Further simplifications for the extra-special cases where the constants are
10216 // all 0 or all -1 should be implemented as folds of these patterns.
10217 SDLoc DL(N);
10218 if (AllAddOne || AllSubOne) {
10219 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10220 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10221 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10222 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10223 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10226 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10227 APInt Pow2C;
10228 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10229 isNullOrNullSplat(N2)) {
10230 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10231 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10232 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10235 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10236 return V;
10238 // The general case for select-of-constants:
10239 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10240 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10241 // leave that to a machine-specific pass.
10242 return SDValue();
10245 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10246 SDValue N0 = N->getOperand(0);
10247 SDValue N1 = N->getOperand(1);
10248 SDValue N2 = N->getOperand(2);
10249 EVT VT = N->getValueType(0);
10250 SDLoc DL(N);
10252 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10253 return V;
10255 if (SDValue V = foldBoolSelectToLogic(N, DAG))
10256 return V;
10258 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10259 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10260 return DAG.getSelect(DL, VT, F, N2, N1);
10262 // Canonicalize integer abs.
10263 // vselect (setg[te] X, 0), X, -X ->
10264 // vselect (setgt X, -1), X, -X ->
10265 // vselect (setl[te] X, 0), -X, X ->
10266 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10267 if (N0.getOpcode() == ISD::SETCC) {
10268 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10269 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10270 bool isAbs = false;
10271 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10273 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10274 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10275 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10276 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10277 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10278 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10279 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10281 if (isAbs) {
10282 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10283 return DAG.getNode(ISD::ABS, DL, VT, LHS);
10285 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10286 DAG.getConstant(VT.getScalarSizeInBits() - 1,
10287 DL, getShiftAmountTy(VT)));
10288 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10289 AddToWorklist(Shift.getNode());
10290 AddToWorklist(Add.getNode());
10291 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10294 // vselect x, y (fcmp lt x, y) -> fminnum x, y
10295 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10297 // This is OK if we don't care about what happens if either operand is a
10298 // NaN.
10300 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10301 if (SDValue FMinMax =
10302 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10303 return FMinMax;
10306 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10307 return S;
10309 // If this select has a condition (setcc) with narrower operands than the
10310 // select, try to widen the compare to match the select width.
10311 // TODO: This should be extended to handle any constant.
10312 // TODO: This could be extended to handle non-loading patterns, but that
10313 // requires thorough testing to avoid regressions.
10314 if (isNullOrNullSplat(RHS)) {
10315 EVT NarrowVT = LHS.getValueType();
10316 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10317 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10318 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10319 unsigned WideWidth = WideVT.getScalarSizeInBits();
10320 bool IsSigned = isSignedIntSetCC(CC);
10321 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10322 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10323 SetCCWidth != 1 && SetCCWidth < WideWidth &&
10324 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10325 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10326 // Both compare operands can be widened for free. The LHS can use an
10327 // extended load, and the RHS is a constant:
10328 // vselect (ext (setcc load(X), C)), N1, N2 -->
10329 // vselect (setcc extload(X), C'), N1, N2
10330 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10331 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10332 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10333 EVT WideSetCCVT = getSetCCResultType(WideVT);
10334 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10335 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10339 // Match VSELECTs into add with unsigned saturation.
10340 if (hasOperation(ISD::UADDSAT, VT)) {
10341 // Check if one of the arms of the VSELECT is vector with all bits set.
10342 // If it's on the left side invert the predicate to simplify logic below.
10343 SDValue Other;
10344 ISD::CondCode SatCC = CC;
10345 if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10346 Other = N2;
10347 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10348 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10349 Other = N1;
10352 if (Other && Other.getOpcode() == ISD::ADD) {
10353 SDValue CondLHS = LHS, CondRHS = RHS;
10354 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10356 // Canonicalize condition operands.
10357 if (SatCC == ISD::SETUGE) {
10358 std::swap(CondLHS, CondRHS);
10359 SatCC = ISD::SETULE;
10362 // We can test against either of the addition operands.
10363 // x <= x+y ? x+y : ~0 --> uaddsat x, y
10364 // x+y >= x ? x+y : ~0 --> uaddsat x, y
10365 if (SatCC == ISD::SETULE && Other == CondRHS &&
10366 (OpLHS == CondLHS || OpRHS == CondLHS))
10367 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10369 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10370 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10371 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10372 CondLHS == OpLHS) {
10373 // If the RHS is a constant we have to reverse the const
10374 // canonicalization.
10375 // x >= ~C ? x+C : ~0 --> uaddsat x, C
10376 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10377 return Cond->getAPIntValue() == ~Op->getAPIntValue();
10379 if (SatCC == ISD::SETULE &&
10380 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10381 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10386 // Match VSELECTs into sub with unsigned saturation.
10387 if (hasOperation(ISD::USUBSAT, VT)) {
10388 // Check if one of the arms of the VSELECT is a zero vector. If it's on
10389 // the left side invert the predicate to simplify logic below.
10390 SDValue Other;
10391 ISD::CondCode SatCC = CC;
10392 if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10393 Other = N2;
10394 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10395 } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10396 Other = N1;
10399 if (Other && Other.getNumOperands() == 2) {
10400 SDValue CondRHS = RHS;
10401 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10403 if (Other.getOpcode() == ISD::SUB &&
10404 LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10405 OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10406 // Look for a general sub with unsigned saturation first.
10407 // zext(x) >= y ? x - trunc(y) : 0
10408 // --> usubsat(x,trunc(umin(y,SatLimit)))
10409 // zext(x) > y ? x - trunc(y) : 0
10410 // --> usubsat(x,trunc(umin(y,SatLimit)))
10411 if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10412 return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10413 DL);
10416 if (OpLHS == LHS) {
10417 // Look for a general sub with unsigned saturation first.
10418 // x >= y ? x-y : 0 --> usubsat x, y
10419 // x > y ? x-y : 0 --> usubsat x, y
10420 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10421 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10422 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10424 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10425 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10426 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10427 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10428 // If the RHS is a constant we have to reverse the const
10429 // canonicalization.
10430 // x > C-1 ? x+-C : 0 --> usubsat x, C
10431 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10432 return (!Op && !Cond) ||
10433 (Op && Cond &&
10434 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10436 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10437 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10438 /*AllowUndefs*/ true)) {
10439 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10440 DAG.getConstant(0, DL, VT), OpRHS);
10441 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10444 // Another special case: If C was a sign bit, the sub has been
10445 // canonicalized into a xor.
10446 // FIXME: Would it be better to use computeKnownBits to determine
10447 // whether it's safe to decanonicalize the xor?
10448 // x s< 0 ? x^C : 0 --> usubsat x, C
10449 APInt SplatValue;
10450 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10451 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10452 ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10453 SplatValue.isSignMask()) {
10454 // Note that we have to rebuild the RHS constant here to
10455 // ensure we don't rely on particular values of undef lanes.
10456 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10457 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10466 if (SimplifySelectOps(N, N1, N2))
10467 return SDValue(N, 0); // Don't revisit N.
10469 // Fold (vselect all_ones, N1, N2) -> N1
10470 if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10471 return N1;
10472 // Fold (vselect all_zeros, N1, N2) -> N2
10473 if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10474 return N2;
10476 // The ConvertSelectToConcatVector function is assuming both the above
10477 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10478 // and addressed.
10479 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10480 N2.getOpcode() == ISD::CONCAT_VECTORS &&
10481 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10482 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10483 return CV;
10486 if (SDValue V = foldVSelectOfConstants(N))
10487 return V;
10489 if (hasOperation(ISD::SRA, VT))
10490 if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
10491 return V;
10493 return SDValue();
10496 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10497 SDValue N0 = N->getOperand(0);
10498 SDValue N1 = N->getOperand(1);
10499 SDValue N2 = N->getOperand(2);
10500 SDValue N3 = N->getOperand(3);
10501 SDValue N4 = N->getOperand(4);
10502 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10504 // fold select_cc lhs, rhs, x, x, cc -> x
10505 if (N2 == N3)
10506 return N2;
10508 // Determine if the condition we're dealing with is constant
10509 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10510 CC, SDLoc(N), false)) {
10511 AddToWorklist(SCC.getNode());
10513 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10514 if (!SCCC->isZero())
10515 return N2; // cond always true -> true val
10516 else
10517 return N3; // cond always false -> false val
10518 } else if (SCC->isUndef()) {
10519 // When the condition is UNDEF, just return the first operand. This is
10520 // coherent the DAG creation, no setcc node is created in this case
10521 return N2;
10522 } else if (SCC.getOpcode() == ISD::SETCC) {
10523 // Fold to a simpler select_cc
10524 SDValue SelectOp = DAG.getNode(
10525 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10526 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10527 SelectOp->setFlags(SCC->getFlags());
10528 return SelectOp;
10532 // If we can fold this based on the true/false value, do so.
10533 if (SimplifySelectOps(N, N2, N3))
10534 return SDValue(N, 0); // Don't revisit N.
10536 // fold select_cc into other things, such as min/max/abs
10537 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10540 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10541 // setcc is very commonly used as an argument to brcond. This pattern
10542 // also lend itself to numerous combines and, as a result, it is desired
10543 // we keep the argument to a brcond as a setcc as much as possible.
10544 bool PreferSetCC =
10545 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10547 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10548 EVT VT = N->getValueType(0);
10550 // SETCC(FREEZE(X), CONST, Cond)
10551 // =>
10552 // FREEZE(SETCC(X, CONST, Cond))
10553 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10554 // isn't equivalent to true or false.
10555 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10556 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10558 // This transformation is beneficial because visitBRCOND can fold
10559 // BRCOND(FREEZE(X)) to BRCOND(X).
10561 // Conservatively optimize integer comparisons only.
10562 if (PreferSetCC) {
10563 // Do this only when SETCC is going to be used by BRCOND.
10565 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10566 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10567 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10568 bool Updated = false;
10570 // Is 'X Cond C' always true or false?
10571 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10572 bool False = (Cond == ISD::SETULT && C->isZero()) ||
10573 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
10574 (Cond == ISD::SETUGT && C->isAllOnes()) ||
10575 (Cond == ISD::SETGT && C->isMaxSignedValue());
10576 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
10577 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
10578 (Cond == ISD::SETUGE && C->isZero()) ||
10579 (Cond == ISD::SETGE && C->isMinSignedValue());
10580 return True || False;
10583 if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10584 if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10585 N0 = N0->getOperand(0);
10586 Updated = true;
10589 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10590 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10591 N0C)) {
10592 N1 = N1->getOperand(0);
10593 Updated = true;
10597 if (Updated)
10598 return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10601 SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10602 SDLoc(N), !PreferSetCC);
10604 if (!Combined)
10605 return SDValue();
10607 // If we prefer to have a setcc, and we don't, we'll try our best to
10608 // recreate one using rebuildSetCC.
10609 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10610 SDValue NewSetCC = rebuildSetCC(Combined);
10612 // We don't have anything interesting to combine to.
10613 if (NewSetCC.getNode() == N)
10614 return SDValue();
10616 if (NewSetCC)
10617 return NewSetCC;
10620 return Combined;
10623 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10624 SDValue LHS = N->getOperand(0);
10625 SDValue RHS = N->getOperand(1);
10626 SDValue Carry = N->getOperand(2);
10627 SDValue Cond = N->getOperand(3);
10629 // If Carry is false, fold to a regular SETCC.
10630 if (isNullConstant(Carry))
10631 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10633 return SDValue();
10636 /// Check if N satisfies:
10637 /// N is used once.
10638 /// N is a Load.
10639 /// The load is compatible with ExtOpcode. It means
10640 /// If load has explicit zero/sign extension, ExpOpcode must have the same
10641 /// extension.
10642 /// Otherwise returns true.
10643 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10644 if (!N.hasOneUse())
10645 return false;
10647 if (!isa<LoadSDNode>(N))
10648 return false;
10650 LoadSDNode *Load = cast<LoadSDNode>(N);
10651 ISD::LoadExtType LoadExt = Load->getExtensionType();
10652 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10653 return true;
10655 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10656 // extension.
10657 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10658 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10659 return false;
10661 return true;
10664 /// Fold
10665 /// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10666 /// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10667 /// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10668 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10669 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10670 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10671 SelectionDAG &DAG) {
10672 unsigned Opcode = N->getOpcode();
10673 SDValue N0 = N->getOperand(0);
10674 EVT VT = N->getValueType(0);
10675 SDLoc DL(N);
10677 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10678 Opcode == ISD::ANY_EXTEND) &&
10679 "Expected EXTEND dag node in input!");
10681 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10682 !N0.hasOneUse())
10683 return SDValue();
10685 SDValue Op1 = N0->getOperand(1);
10686 SDValue Op2 = N0->getOperand(2);
10687 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10688 return SDValue();
10690 auto ExtLoadOpcode = ISD::EXTLOAD;
10691 if (Opcode == ISD::SIGN_EXTEND)
10692 ExtLoadOpcode = ISD::SEXTLOAD;
10693 else if (Opcode == ISD::ZERO_EXTEND)
10694 ExtLoadOpcode = ISD::ZEXTLOAD;
10696 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10697 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10698 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10699 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10700 return SDValue();
10702 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10703 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10704 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10707 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10708 /// a build_vector of constants.
10709 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10710 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10711 /// Vector extends are not folded if operations are legal; this is to
10712 /// avoid introducing illegal build_vector dag nodes.
10713 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10714 SelectionDAG &DAG, bool LegalTypes) {
10715 unsigned Opcode = N->getOpcode();
10716 SDValue N0 = N->getOperand(0);
10717 EVT VT = N->getValueType(0);
10718 SDLoc DL(N);
10720 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10721 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10722 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10723 && "Expected EXTEND dag node in input!");
10725 // fold (sext c1) -> c1
10726 // fold (zext c1) -> c1
10727 // fold (aext c1) -> c1
10728 if (isa<ConstantSDNode>(N0))
10729 return DAG.getNode(Opcode, DL, VT, N0);
10731 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10732 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10733 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10734 if (N0->getOpcode() == ISD::SELECT) {
10735 SDValue Op1 = N0->getOperand(1);
10736 SDValue Op2 = N0->getOperand(2);
10737 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10738 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10739 // For any_extend, choose sign extension of the constants to allow a
10740 // possible further transform to sign_extend_inreg.i.e.
10742 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10743 // t2: i64 = any_extend t1
10744 // -->
10745 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10746 // -->
10747 // t4: i64 = sign_extend_inreg t3
10748 unsigned FoldOpc = Opcode;
10749 if (FoldOpc == ISD::ANY_EXTEND)
10750 FoldOpc = ISD::SIGN_EXTEND;
10751 return DAG.getSelect(DL, VT, N0->getOperand(0),
10752 DAG.getNode(FoldOpc, DL, VT, Op1),
10753 DAG.getNode(FoldOpc, DL, VT, Op2));
10757 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10758 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10759 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10760 EVT SVT = VT.getScalarType();
10761 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10762 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10763 return SDValue();
10765 // We can fold this node into a build_vector.
10766 unsigned VTBits = SVT.getSizeInBits();
10767 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10768 SmallVector<SDValue, 8> Elts;
10769 unsigned NumElts = VT.getVectorNumElements();
10771 // For zero-extensions, UNDEF elements still guarantee to have the upper
10772 // bits set to zero.
10773 bool IsZext =
10774 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10776 for (unsigned i = 0; i != NumElts; ++i) {
10777 SDValue Op = N0.getOperand(i);
10778 if (Op.isUndef()) {
10779 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10780 continue;
10783 SDLoc DL(Op);
10784 // Get the constant value and if needed trunc it to the size of the type.
10785 // Nodes like build_vector might have constants wider than the scalar type.
10786 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10787 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10788 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10789 else
10790 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10793 return DAG.getBuildVector(VT, DL, Elts);
10796 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10797 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10798 // transformation. Returns true if extension are possible and the above
10799 // mentioned transformation is profitable.
10800 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10801 unsigned ExtOpc,
10802 SmallVectorImpl<SDNode *> &ExtendNodes,
10803 const TargetLowering &TLI) {
10804 bool HasCopyToRegUses = false;
10805 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10806 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10807 UE = N0.getNode()->use_end();
10808 UI != UE; ++UI) {
10809 SDNode *User = *UI;
10810 if (User == N)
10811 continue;
10812 if (UI.getUse().getResNo() != N0.getResNo())
10813 continue;
10814 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10815 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10816 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10817 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10818 // Sign bits will be lost after a zext.
10819 return false;
10820 bool Add = false;
10821 for (unsigned i = 0; i != 2; ++i) {
10822 SDValue UseOp = User->getOperand(i);
10823 if (UseOp == N0)
10824 continue;
10825 if (!isa<ConstantSDNode>(UseOp))
10826 return false;
10827 Add = true;
10829 if (Add)
10830 ExtendNodes.push_back(User);
10831 continue;
10833 // If truncates aren't free and there are users we can't
10834 // extend, it isn't worthwhile.
10835 if (!isTruncFree)
10836 return false;
10837 // Remember if this value is live-out.
10838 if (User->getOpcode() == ISD::CopyToReg)
10839 HasCopyToRegUses = true;
10842 if (HasCopyToRegUses) {
10843 bool BothLiveOut = false;
10844 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10845 UI != UE; ++UI) {
10846 SDUse &Use = UI.getUse();
10847 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10848 BothLiveOut = true;
10849 break;
10852 if (BothLiveOut)
10853 // Both unextended and extended values are live out. There had better be
10854 // a good reason for the transformation.
10855 return ExtendNodes.size();
10857 return true;
10860 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10861 SDValue OrigLoad, SDValue ExtLoad,
10862 ISD::NodeType ExtType) {
10863 // Extend SetCC uses if necessary.
10864 SDLoc DL(ExtLoad);
10865 for (SDNode *SetCC : SetCCs) {
10866 SmallVector<SDValue, 4> Ops;
10868 for (unsigned j = 0; j != 2; ++j) {
10869 SDValue SOp = SetCC->getOperand(j);
10870 if (SOp == OrigLoad)
10871 Ops.push_back(ExtLoad);
10872 else
10873 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10876 Ops.push_back(SetCC->getOperand(2));
10877 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10881 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10882 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10883 SDValue N0 = N->getOperand(0);
10884 EVT DstVT = N->getValueType(0);
10885 EVT SrcVT = N0.getValueType();
10887 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10888 N->getOpcode() == ISD::ZERO_EXTEND) &&
10889 "Unexpected node type (not an extend)!");
10891 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10892 // For example, on a target with legal v4i32, but illegal v8i32, turn:
10893 // (v8i32 (sext (v8i16 (load x))))
10894 // into:
10895 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10896 // (v4i32 (sextload (x + 16)))))
10897 // Where uses of the original load, i.e.:
10898 // (v8i16 (load x))
10899 // are replaced with:
10900 // (v8i16 (truncate
10901 // (v8i32 (concat_vectors (v4i32 (sextload x)),
10902 // (v4i32 (sextload (x + 16)))))))
10904 // This combine is only applicable to illegal, but splittable, vectors.
10905 // All legal types, and illegal non-vector types, are handled elsewhere.
10906 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10908 if (N0->getOpcode() != ISD::LOAD)
10909 return SDValue();
10911 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10913 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10914 !N0.hasOneUse() || !LN0->isSimple() ||
10915 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10916 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10917 return SDValue();
10919 SmallVector<SDNode *, 4> SetCCs;
10920 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10921 return SDValue();
10923 ISD::LoadExtType ExtType =
10924 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10926 // Try to split the vector types to get down to legal types.
10927 EVT SplitSrcVT = SrcVT;
10928 EVT SplitDstVT = DstVT;
10929 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10930 SplitSrcVT.getVectorNumElements() > 1) {
10931 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10932 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10935 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10936 return SDValue();
10938 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10940 SDLoc DL(N);
10941 const unsigned NumSplits =
10942 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10943 const unsigned Stride = SplitSrcVT.getStoreSize();
10944 SmallVector<SDValue, 4> Loads;
10945 SmallVector<SDValue, 4> Chains;
10947 SDValue BasePtr = LN0->getBasePtr();
10948 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10949 const unsigned Offset = Idx * Stride;
10950 const Align Align = commonAlignment(LN0->getAlign(), Offset);
10952 SDValue SplitLoad = DAG.getExtLoad(
10953 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10954 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10955 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10957 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10959 Loads.push_back(SplitLoad.getValue(0));
10960 Chains.push_back(SplitLoad.getValue(1));
10963 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10964 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10966 // Simplify TF.
10967 AddToWorklist(NewChain.getNode());
10969 CombineTo(N, NewValue);
10971 // Replace uses of the original load (before extension)
10972 // with a truncate of the concatenated sextloaded vectors.
10973 SDValue Trunc =
10974 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10975 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10976 CombineTo(N0.getNode(), Trunc, NewChain);
10977 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10980 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10981 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10982 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10983 assert(N->getOpcode() == ISD::ZERO_EXTEND);
10984 EVT VT = N->getValueType(0);
10985 EVT OrigVT = N->getOperand(0).getValueType();
10986 if (TLI.isZExtFree(OrigVT, VT))
10987 return SDValue();
10989 // and/or/xor
10990 SDValue N0 = N->getOperand(0);
10991 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10992 N0.getOpcode() == ISD::XOR) ||
10993 N0.getOperand(1).getOpcode() != ISD::Constant ||
10994 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10995 return SDValue();
10997 // shl/shr
10998 SDValue N1 = N0->getOperand(0);
10999 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11000 N1.getOperand(1).getOpcode() != ISD::Constant ||
11001 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11002 return SDValue();
11004 // load
11005 if (!isa<LoadSDNode>(N1.getOperand(0)))
11006 return SDValue();
11007 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11008 EVT MemVT = Load->getMemoryVT();
11009 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11010 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11011 return SDValue();
11014 // If the shift op is SHL, the logic op must be AND, otherwise the result
11015 // will be wrong.
11016 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11017 return SDValue();
11019 if (!N0.hasOneUse() || !N1.hasOneUse())
11020 return SDValue();
11022 SmallVector<SDNode*, 4> SetCCs;
11023 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11024 ISD::ZERO_EXTEND, SetCCs, TLI))
11025 return SDValue();
11027 // Actually do the transformation.
11028 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11029 Load->getChain(), Load->getBasePtr(),
11030 Load->getMemoryVT(), Load->getMemOperand());
11032 SDLoc DL1(N1);
11033 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11034 N1.getOperand(1));
11036 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11037 SDLoc DL0(N0);
11038 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11039 DAG.getConstant(Mask, DL0, VT));
11041 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11042 CombineTo(N, And);
11043 if (SDValue(Load, 0).hasOneUse()) {
11044 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11045 } else {
11046 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11047 Load->getValueType(0), ExtLoad);
11048 CombineTo(Load, Trunc, ExtLoad.getValue(1));
11051 // N0 is dead at this point.
11052 recursivelyDeleteUnusedNodes(N0.getNode());
11054 return SDValue(N,0); // Return N so it doesn't get rechecked!
11057 /// If we're narrowing or widening the result of a vector select and the final
11058 /// size is the same size as a setcc (compare) feeding the select, then try to
11059 /// apply the cast operation to the select's operands because matching vector
11060 /// sizes for a select condition and other operands should be more efficient.
11061 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11062 unsigned CastOpcode = Cast->getOpcode();
11063 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11064 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11065 CastOpcode == ISD::FP_ROUND) &&
11066 "Unexpected opcode for vector select narrowing/widening");
11068 // We only do this transform before legal ops because the pattern may be
11069 // obfuscated by target-specific operations after legalization. Do not create
11070 // an illegal select op, however, because that may be difficult to lower.
11071 EVT VT = Cast->getValueType(0);
11072 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11073 return SDValue();
11075 SDValue VSel = Cast->getOperand(0);
11076 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11077 VSel.getOperand(0).getOpcode() != ISD::SETCC)
11078 return SDValue();
11080 // Does the setcc have the same vector size as the casted select?
11081 SDValue SetCC = VSel.getOperand(0);
11082 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11083 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11084 return SDValue();
11086 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11087 SDValue A = VSel.getOperand(1);
11088 SDValue B = VSel.getOperand(2);
11089 SDValue CastA, CastB;
11090 SDLoc DL(Cast);
11091 if (CastOpcode == ISD::FP_ROUND) {
11092 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11093 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11094 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11095 } else {
11096 CastA = DAG.getNode(CastOpcode, DL, VT, A);
11097 CastB = DAG.getNode(CastOpcode, DL, VT, B);
11099 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11102 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11103 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11104 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11105 const TargetLowering &TLI, EVT VT,
11106 bool LegalOperations, SDNode *N,
11107 SDValue N0, ISD::LoadExtType ExtLoadType) {
11108 SDNode *N0Node = N0.getNode();
11109 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11110 : ISD::isZEXTLoad(N0Node);
11111 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11112 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11113 return SDValue();
11115 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11116 EVT MemVT = LN0->getMemoryVT();
11117 if ((LegalOperations || !LN0->isSimple() ||
11118 VT.isVector()) &&
11119 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11120 return SDValue();
11122 SDValue ExtLoad =
11123 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11124 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11125 Combiner.CombineTo(N, ExtLoad);
11126 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11127 if (LN0->use_empty())
11128 Combiner.recursivelyDeleteUnusedNodes(LN0);
11129 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11132 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11133 // Only generate vector extloads when 1) they're legal, and 2) they are
11134 // deemed desirable by the target.
11135 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11136 const TargetLowering &TLI, EVT VT,
11137 bool LegalOperations, SDNode *N, SDValue N0,
11138 ISD::LoadExtType ExtLoadType,
11139 ISD::NodeType ExtOpc) {
11140 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11141 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11142 ((LegalOperations || VT.isVector() ||
11143 !cast<LoadSDNode>(N0)->isSimple()) &&
11144 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11145 return {};
11147 bool DoXform = true;
11148 SmallVector<SDNode *, 4> SetCCs;
11149 if (!N0.hasOneUse())
11150 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11151 if (VT.isVector())
11152 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11153 if (!DoXform)
11154 return {};
11156 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11157 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11158 LN0->getBasePtr(), N0.getValueType(),
11159 LN0->getMemOperand());
11160 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11161 // If the load value is used only by N, replace it via CombineTo N.
11162 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11163 Combiner.CombineTo(N, ExtLoad);
11164 if (NoReplaceTrunc) {
11165 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11166 Combiner.recursivelyDeleteUnusedNodes(LN0);
11167 } else {
11168 SDValue Trunc =
11169 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11170 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11172 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11175 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11176 const TargetLowering &TLI, EVT VT,
11177 SDNode *N, SDValue N0,
11178 ISD::LoadExtType ExtLoadType,
11179 ISD::NodeType ExtOpc) {
11180 if (!N0.hasOneUse())
11181 return SDValue();
11183 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11184 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11185 return SDValue();
11187 if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11188 return SDValue();
11190 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11191 return SDValue();
11193 SDLoc dl(Ld);
11194 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11195 SDValue NewLoad = DAG.getMaskedLoad(
11196 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11197 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11198 ExtLoadType, Ld->isExpandingLoad());
11199 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11200 return NewLoad;
11203 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11204 bool LegalOperations) {
11205 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11206 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11208 SDValue SetCC = N->getOperand(0);
11209 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11210 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11211 return SDValue();
11213 SDValue X = SetCC.getOperand(0);
11214 SDValue Ones = SetCC.getOperand(1);
11215 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11216 EVT VT = N->getValueType(0);
11217 EVT XVT = X.getValueType();
11218 // setge X, C is canonicalized to setgt, so we do not need to match that
11219 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11220 // not require the 'not' op.
11221 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11222 // Invert and smear/shift the sign bit:
11223 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11224 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11225 SDLoc DL(N);
11226 unsigned ShCt = VT.getSizeInBits() - 1;
11227 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11228 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11229 SDValue NotX = DAG.getNOT(DL, X, VT);
11230 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11231 auto ShiftOpcode =
11232 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11233 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11236 return SDValue();
11239 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11240 SDValue N0 = N->getOperand(0);
11241 if (N0.getOpcode() != ISD::SETCC)
11242 return SDValue();
11244 SDValue N00 = N0.getOperand(0);
11245 SDValue N01 = N0.getOperand(1);
11246 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11247 EVT VT = N->getValueType(0);
11248 EVT N00VT = N00.getValueType();
11249 SDLoc DL(N);
11251 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11252 // the same size as the compared operands. Try to optimize sext(setcc())
11253 // if this is the case.
11254 if (VT.isVector() && !LegalOperations &&
11255 TLI.getBooleanContents(N00VT) ==
11256 TargetLowering::ZeroOrNegativeOneBooleanContent) {
11257 EVT SVT = getSetCCResultType(N00VT);
11259 // If we already have the desired type, don't change it.
11260 if (SVT != N0.getValueType()) {
11261 // We know that the # elements of the results is the same as the
11262 // # elements of the compare (and the # elements of the compare result
11263 // for that matter). Check to see that they are the same size. If so,
11264 // we know that the element size of the sext'd result matches the
11265 // element size of the compare operands.
11266 if (VT.getSizeInBits() == SVT.getSizeInBits())
11267 return DAG.getSetCC(DL, VT, N00, N01, CC);
11269 // If the desired elements are smaller or larger than the source
11270 // elements, we can use a matching integer vector type and then
11271 // truncate/sign extend.
11272 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11273 if (SVT == MatchingVecType) {
11274 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11275 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11279 // Try to eliminate the sext of a setcc by zexting the compare operands.
11280 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11281 !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11282 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11283 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11284 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11286 // We have an unsupported narrow vector compare op that would be legal
11287 // if extended to the destination type. See if the compare operands
11288 // can be freely extended to the destination type.
11289 auto IsFreeToExtend = [&](SDValue V) {
11290 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11291 return true;
11292 // Match a simple, non-extended load that can be converted to a
11293 // legal {z/s}ext-load.
11294 // TODO: Allow widening of an existing {z/s}ext-load?
11295 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11296 ISD::isUNINDEXEDLoad(V.getNode()) &&
11297 cast<LoadSDNode>(V)->isSimple() &&
11298 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11299 return false;
11301 // Non-chain users of this value must either be the setcc in this
11302 // sequence or extends that can be folded into the new {z/s}ext-load.
11303 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11304 UI != UE; ++UI) {
11305 // Skip uses of the chain and the setcc.
11306 SDNode *User = *UI;
11307 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11308 continue;
11309 // Extra users must have exactly the same cast we are about to create.
11310 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11311 // is enhanced similarly.
11312 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11313 return false;
11315 return true;
11318 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11319 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11320 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11321 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11326 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11327 // Here, T can be 1 or -1, depending on the type of the setcc and
11328 // getBooleanContents().
11329 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11331 // To determine the "true" side of the select, we need to know the high bit
11332 // of the value returned by the setcc if it evaluates to true.
11333 // If the type of the setcc is i1, then the true case of the select is just
11334 // sext(i1 1), that is, -1.
11335 // If the type of the setcc is larger (say, i8) then the value of the high
11336 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11337 // of the appropriate width.
11338 SDValue ExtTrueVal = (SetCCWidth == 1)
11339 ? DAG.getAllOnesConstant(DL, VT)
11340 : DAG.getBoolConstant(true, DL, VT, N00VT);
11341 SDValue Zero = DAG.getConstant(0, DL, VT);
11342 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11343 return SCC;
11345 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11346 EVT SetCCVT = getSetCCResultType(N00VT);
11347 // Don't do this transform for i1 because there's a select transform
11348 // that would reverse it.
11349 // TODO: We should not do this transform at all without a target hook
11350 // because a sext is likely cheaper than a select?
11351 if (SetCCVT.getScalarSizeInBits() != 1 &&
11352 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11353 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11354 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11358 return SDValue();
11361 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11362 SDValue N0 = N->getOperand(0);
11363 EVT VT = N->getValueType(0);
11364 SDLoc DL(N);
11366 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11367 return Res;
11369 // fold (sext (sext x)) -> (sext x)
11370 // fold (sext (aext x)) -> (sext x)
11371 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11372 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11374 if (N0.getOpcode() == ISD::TRUNCATE) {
11375 // fold (sext (truncate (load x))) -> (sext (smaller load x))
11376 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11377 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11378 SDNode *oye = N0.getOperand(0).getNode();
11379 if (NarrowLoad.getNode() != N0.getNode()) {
11380 CombineTo(N0.getNode(), NarrowLoad);
11381 // CombineTo deleted the truncate, if needed, but not what's under it.
11382 AddToWorklist(oye);
11384 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11387 // See if the value being truncated is already sign extended. If so, just
11388 // eliminate the trunc/sext pair.
11389 SDValue Op = N0.getOperand(0);
11390 unsigned OpBits = Op.getScalarValueSizeInBits();
11391 unsigned MidBits = N0.getScalarValueSizeInBits();
11392 unsigned DestBits = VT.getScalarSizeInBits();
11393 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11395 if (OpBits == DestBits) {
11396 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
11397 // bits, it is already ready.
11398 if (NumSignBits > DestBits-MidBits)
11399 return Op;
11400 } else if (OpBits < DestBits) {
11401 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
11402 // bits, just sext from i32.
11403 if (NumSignBits > OpBits-MidBits)
11404 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11405 } else {
11406 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
11407 // bits, just truncate to i32.
11408 if (NumSignBits > OpBits-MidBits)
11409 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11412 // fold (sext (truncate x)) -> (sextinreg x).
11413 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11414 N0.getValueType())) {
11415 if (OpBits < DestBits)
11416 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11417 else if (OpBits > DestBits)
11418 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11419 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11420 DAG.getValueType(N0.getValueType()));
11424 // Try to simplify (sext (load x)).
11425 if (SDValue foldedExt =
11426 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11427 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11428 return foldedExt;
11430 if (SDValue foldedExt =
11431 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11432 ISD::SIGN_EXTEND))
11433 return foldedExt;
11435 // fold (sext (load x)) to multiple smaller sextloads.
11436 // Only on illegal but splittable vectors.
11437 if (SDValue ExtLoad = CombineExtLoad(N))
11438 return ExtLoad;
11440 // Try to simplify (sext (sextload x)).
11441 if (SDValue foldedExt = tryToFoldExtOfExtload(
11442 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11443 return foldedExt;
11445 // fold (sext (and/or/xor (load x), cst)) ->
11446 // (and/or/xor (sextload x), (sext cst))
11447 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11448 N0.getOpcode() == ISD::XOR) &&
11449 isa<LoadSDNode>(N0.getOperand(0)) &&
11450 N0.getOperand(1).getOpcode() == ISD::Constant &&
11451 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11452 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11453 EVT MemVT = LN00->getMemoryVT();
11454 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11455 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11456 SmallVector<SDNode*, 4> SetCCs;
11457 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11458 ISD::SIGN_EXTEND, SetCCs, TLI);
11459 if (DoXform) {
11460 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11461 LN00->getChain(), LN00->getBasePtr(),
11462 LN00->getMemoryVT(),
11463 LN00->getMemOperand());
11464 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11465 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11466 ExtLoad, DAG.getConstant(Mask, DL, VT));
11467 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11468 bool NoReplaceTruncAnd = !N0.hasOneUse();
11469 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11470 CombineTo(N, And);
11471 // If N0 has multiple uses, change other uses as well.
11472 if (NoReplaceTruncAnd) {
11473 SDValue TruncAnd =
11474 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11475 CombineTo(N0.getNode(), TruncAnd);
11477 if (NoReplaceTrunc) {
11478 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11479 } else {
11480 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11481 LN00->getValueType(0), ExtLoad);
11482 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11484 return SDValue(N,0); // Return N so it doesn't get rechecked!
11489 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11490 return V;
11492 if (SDValue V = foldSextSetcc(N))
11493 return V;
11495 // fold (sext x) -> (zext x) if the sign bit is known zero.
11496 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11497 DAG.SignBitIsZero(N0))
11498 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11500 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11501 return NewVSel;
11503 // Eliminate this sign extend by doing a negation in the destination type:
11504 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11505 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11506 isNullOrNullSplat(N0.getOperand(0)) &&
11507 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11508 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11509 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11510 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11512 // Eliminate this sign extend by doing a decrement in the destination type:
11513 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11514 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11515 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11516 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11517 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11518 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11519 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11522 // fold sext (not i1 X) -> add (zext i1 X), -1
11523 // TODO: This could be extended to handle bool vectors.
11524 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11525 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11526 TLI.isOperationLegal(ISD::ADD, VT)))) {
11527 // If we can eliminate the 'not', the sext form should be better
11528 if (SDValue NewXor = visitXOR(N0.getNode())) {
11529 // Returning N0 is a form of in-visit replacement that may have
11530 // invalidated N0.
11531 if (NewXor.getNode() == N0.getNode()) {
11532 // Return SDValue here as the xor should have already been replaced in
11533 // this sext.
11534 return SDValue();
11535 } else {
11536 // Return a new sext with the new xor.
11537 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11541 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11542 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11545 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11546 return Res;
11548 return SDValue();
11551 // isTruncateOf - If N is a truncate of some other value, return true, record
11552 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11553 // This function computes KnownBits to avoid a duplicated call to
11554 // computeKnownBits in the caller.
11555 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11556 KnownBits &Known) {
11557 if (N->getOpcode() == ISD::TRUNCATE) {
11558 Op = N->getOperand(0);
11559 Known = DAG.computeKnownBits(Op);
11560 return true;
11563 if (N.getOpcode() != ISD::SETCC ||
11564 N.getValueType().getScalarType() != MVT::i1 ||
11565 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11566 return false;
11568 SDValue Op0 = N->getOperand(0);
11569 SDValue Op1 = N->getOperand(1);
11570 assert(Op0.getValueType() == Op1.getValueType());
11572 if (isNullOrNullSplat(Op0))
11573 Op = Op1;
11574 else if (isNullOrNullSplat(Op1))
11575 Op = Op0;
11576 else
11577 return false;
11579 Known = DAG.computeKnownBits(Op);
11581 return (Known.Zero | 1).isAllOnes();
11584 /// Given an extending node with a pop-count operand, if the target does not
11585 /// support a pop-count in the narrow source type but does support it in the
11586 /// destination type, widen the pop-count to the destination type.
11587 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11588 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11589 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11591 SDValue CtPop = Extend->getOperand(0);
11592 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11593 return SDValue();
11595 EVT VT = Extend->getValueType(0);
11596 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11597 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11598 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11599 return SDValue();
11601 // zext (ctpop X) --> ctpop (zext X)
11602 SDLoc DL(Extend);
11603 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11604 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11607 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11608 SDValue N0 = N->getOperand(0);
11609 EVT VT = N->getValueType(0);
11611 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11612 return Res;
11614 // fold (zext (zext x)) -> (zext x)
11615 // fold (zext (aext x)) -> (zext x)
11616 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11617 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11618 N0.getOperand(0));
11620 // fold (zext (truncate x)) -> (zext x) or
11621 // (zext (truncate x)) -> (truncate x)
11622 // This is valid when the truncated bits of x are already zero.
11623 SDValue Op;
11624 KnownBits Known;
11625 if (isTruncateOf(DAG, N0, Op, Known)) {
11626 APInt TruncatedBits =
11627 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11628 APInt(Op.getScalarValueSizeInBits(), 0) :
11629 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11630 N0.getScalarValueSizeInBits(),
11631 std::min(Op.getScalarValueSizeInBits(),
11632 VT.getScalarSizeInBits()));
11633 if (TruncatedBits.isSubsetOf(Known.Zero))
11634 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11637 // fold (zext (truncate x)) -> (and x, mask)
11638 if (N0.getOpcode() == ISD::TRUNCATE) {
11639 // fold (zext (truncate (load x))) -> (zext (smaller load x))
11640 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11641 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11642 SDNode *oye = N0.getOperand(0).getNode();
11643 if (NarrowLoad.getNode() != N0.getNode()) {
11644 CombineTo(N0.getNode(), NarrowLoad);
11645 // CombineTo deleted the truncate, if needed, but not what's under it.
11646 AddToWorklist(oye);
11648 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11651 EVT SrcVT = N0.getOperand(0).getValueType();
11652 EVT MinVT = N0.getValueType();
11654 // Try to mask before the extension to avoid having to generate a larger mask,
11655 // possibly over several sub-vectors.
11656 if (SrcVT.bitsLT(VT) && VT.isVector()) {
11657 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11658 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11659 SDValue Op = N0.getOperand(0);
11660 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11661 AddToWorklist(Op.getNode());
11662 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11663 // Transfer the debug info; the new node is equivalent to N0.
11664 DAG.transferDbgValues(N0, ZExtOrTrunc);
11665 return ZExtOrTrunc;
11669 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11670 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11671 AddToWorklist(Op.getNode());
11672 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11673 // We may safely transfer the debug info describing the truncate node over
11674 // to the equivalent and operation.
11675 DAG.transferDbgValues(N0, And);
11676 return And;
11680 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11681 // if either of the casts is not free.
11682 if (N0.getOpcode() == ISD::AND &&
11683 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11684 N0.getOperand(1).getOpcode() == ISD::Constant &&
11685 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11686 N0.getValueType()) ||
11687 !TLI.isZExtFree(N0.getValueType(), VT))) {
11688 SDValue X = N0.getOperand(0).getOperand(0);
11689 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11690 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11691 SDLoc DL(N);
11692 return DAG.getNode(ISD::AND, DL, VT,
11693 X, DAG.getConstant(Mask, DL, VT));
11696 // Try to simplify (zext (load x)).
11697 if (SDValue foldedExt =
11698 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11699 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11700 return foldedExt;
11702 if (SDValue foldedExt =
11703 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11704 ISD::ZERO_EXTEND))
11705 return foldedExt;
11707 // fold (zext (load x)) to multiple smaller zextloads.
11708 // Only on illegal but splittable vectors.
11709 if (SDValue ExtLoad = CombineExtLoad(N))
11710 return ExtLoad;
11712 // fold (zext (and/or/xor (load x), cst)) ->
11713 // (and/or/xor (zextload x), (zext cst))
11714 // Unless (and (load x) cst) will match as a zextload already and has
11715 // additional users.
11716 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11717 N0.getOpcode() == ISD::XOR) &&
11718 isa<LoadSDNode>(N0.getOperand(0)) &&
11719 N0.getOperand(1).getOpcode() == ISD::Constant &&
11720 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11721 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11722 EVT MemVT = LN00->getMemoryVT();
11723 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11724 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11725 bool DoXform = true;
11726 SmallVector<SDNode*, 4> SetCCs;
11727 if (!N0.hasOneUse()) {
11728 if (N0.getOpcode() == ISD::AND) {
11729 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11730 EVT LoadResultTy = AndC->getValueType(0);
11731 EVT ExtVT;
11732 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11733 DoXform = false;
11736 if (DoXform)
11737 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11738 ISD::ZERO_EXTEND, SetCCs, TLI);
11739 if (DoXform) {
11740 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11741 LN00->getChain(), LN00->getBasePtr(),
11742 LN00->getMemoryVT(),
11743 LN00->getMemOperand());
11744 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11745 SDLoc DL(N);
11746 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11747 ExtLoad, DAG.getConstant(Mask, DL, VT));
11748 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11749 bool NoReplaceTruncAnd = !N0.hasOneUse();
11750 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11751 CombineTo(N, And);
11752 // If N0 has multiple uses, change other uses as well.
11753 if (NoReplaceTruncAnd) {
11754 SDValue TruncAnd =
11755 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11756 CombineTo(N0.getNode(), TruncAnd);
11758 if (NoReplaceTrunc) {
11759 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11760 } else {
11761 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11762 LN00->getValueType(0), ExtLoad);
11763 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11765 return SDValue(N,0); // Return N so it doesn't get rechecked!
11770 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11771 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11772 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11773 return ZExtLoad;
11775 // Try to simplify (zext (zextload x)).
11776 if (SDValue foldedExt = tryToFoldExtOfExtload(
11777 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11778 return foldedExt;
11780 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11781 return V;
11783 if (N0.getOpcode() == ISD::SETCC) {
11784 // Only do this before legalize for now.
11785 if (!LegalOperations && VT.isVector() &&
11786 N0.getValueType().getVectorElementType() == MVT::i1) {
11787 EVT N00VT = N0.getOperand(0).getValueType();
11788 if (getSetCCResultType(N00VT) == N0.getValueType())
11789 return SDValue();
11791 // We know that the # elements of the results is the same as the #
11792 // elements of the compare (and the # elements of the compare result for
11793 // that matter). Check to see that they are the same size. If so, we know
11794 // that the element size of the sext'd result matches the element size of
11795 // the compare operands.
11796 SDLoc DL(N);
11797 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11798 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11799 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11800 N0.getOperand(1), N0.getOperand(2));
11801 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11804 // If the desired elements are smaller or larger than the source
11805 // elements we can use a matching integer vector type and then
11806 // truncate/any extend followed by zext_in_reg.
11807 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11808 SDValue VsetCC =
11809 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11810 N0.getOperand(1), N0.getOperand(2));
11811 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11812 N0.getValueType());
11815 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11816 SDLoc DL(N);
11817 EVT N0VT = N0.getValueType();
11818 EVT N00VT = N0.getOperand(0).getValueType();
11819 if (SDValue SCC = SimplifySelectCC(
11820 DL, N0.getOperand(0), N0.getOperand(1),
11821 DAG.getBoolConstant(true, DL, N0VT, N00VT),
11822 DAG.getBoolConstant(false, DL, N0VT, N00VT),
11823 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11824 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11827 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11828 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11829 isa<ConstantSDNode>(N0.getOperand(1)) &&
11830 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11831 N0.hasOneUse()) {
11832 SDValue ShAmt = N0.getOperand(1);
11833 if (N0.getOpcode() == ISD::SHL) {
11834 SDValue InnerZExt = N0.getOperand(0);
11835 // If the original shl may be shifting out bits, do not perform this
11836 // transformation.
11837 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11838 InnerZExt.getOperand(0).getValueSizeInBits();
11839 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11840 return SDValue();
11843 SDLoc DL(N);
11845 // Ensure that the shift amount is wide enough for the shifted value.
11846 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11847 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11849 return DAG.getNode(N0.getOpcode(), DL, VT,
11850 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11851 ShAmt);
11854 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11855 return NewVSel;
11857 if (SDValue NewCtPop = widenCtPop(N, DAG))
11858 return NewCtPop;
11860 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11861 return Res;
11863 return SDValue();
11866 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11867 SDValue N0 = N->getOperand(0);
11868 EVT VT = N->getValueType(0);
11870 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11871 return Res;
11873 // fold (aext (aext x)) -> (aext x)
11874 // fold (aext (zext x)) -> (zext x)
11875 // fold (aext (sext x)) -> (sext x)
11876 if (N0.getOpcode() == ISD::ANY_EXTEND ||
11877 N0.getOpcode() == ISD::ZERO_EXTEND ||
11878 N0.getOpcode() == ISD::SIGN_EXTEND)
11879 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11881 // fold (aext (truncate (load x))) -> (aext (smaller load x))
11882 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11883 if (N0.getOpcode() == ISD::TRUNCATE) {
11884 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11885 SDNode *oye = N0.getOperand(0).getNode();
11886 if (NarrowLoad.getNode() != N0.getNode()) {
11887 CombineTo(N0.getNode(), NarrowLoad);
11888 // CombineTo deleted the truncate, if needed, but not what's under it.
11889 AddToWorklist(oye);
11891 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11895 // fold (aext (truncate x))
11896 if (N0.getOpcode() == ISD::TRUNCATE)
11897 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11899 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11900 // if the trunc is not free.
11901 if (N0.getOpcode() == ISD::AND &&
11902 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11903 N0.getOperand(1).getOpcode() == ISD::Constant &&
11904 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11905 N0.getValueType())) {
11906 SDLoc DL(N);
11907 SDValue X = N0.getOperand(0).getOperand(0);
11908 X = DAG.getAnyExtOrTrunc(X, DL, VT);
11909 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11910 return DAG.getNode(ISD::AND, DL, VT,
11911 X, DAG.getConstant(Mask, DL, VT));
11914 // fold (aext (load x)) -> (aext (truncate (extload x)))
11915 // None of the supported targets knows how to perform load and any_ext
11916 // on vectors in one instruction, so attempt to fold to zext instead.
11917 if (VT.isVector()) {
11918 // Try to simplify (zext (load x)).
11919 if (SDValue foldedExt =
11920 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11921 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11922 return foldedExt;
11923 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11924 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11925 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11926 bool DoXform = true;
11927 SmallVector<SDNode *, 4> SetCCs;
11928 if (!N0.hasOneUse())
11929 DoXform =
11930 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11931 if (DoXform) {
11932 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11933 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11934 LN0->getChain(), LN0->getBasePtr(),
11935 N0.getValueType(), LN0->getMemOperand());
11936 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11937 // If the load value is used only by N, replace it via CombineTo N.
11938 bool NoReplaceTrunc = N0.hasOneUse();
11939 CombineTo(N, ExtLoad);
11940 if (NoReplaceTrunc) {
11941 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11942 recursivelyDeleteUnusedNodes(LN0);
11943 } else {
11944 SDValue Trunc =
11945 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11946 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11948 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11952 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11953 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11954 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
11955 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11956 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11957 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11958 ISD::LoadExtType ExtType = LN0->getExtensionType();
11959 EVT MemVT = LN0->getMemoryVT();
11960 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11961 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11962 VT, LN0->getChain(), LN0->getBasePtr(),
11963 MemVT, LN0->getMemOperand());
11964 CombineTo(N, ExtLoad);
11965 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11966 recursivelyDeleteUnusedNodes(LN0);
11967 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11971 if (N0.getOpcode() == ISD::SETCC) {
11972 // For vectors:
11973 // aext(setcc) -> vsetcc
11974 // aext(setcc) -> truncate(vsetcc)
11975 // aext(setcc) -> aext(vsetcc)
11976 // Only do this before legalize for now.
11977 if (VT.isVector() && !LegalOperations) {
11978 EVT N00VT = N0.getOperand(0).getValueType();
11979 if (getSetCCResultType(N00VT) == N0.getValueType())
11980 return SDValue();
11982 // We know that the # elements of the results is the same as the
11983 // # elements of the compare (and the # elements of the compare result
11984 // for that matter). Check to see that they are the same size. If so,
11985 // we know that the element size of the sext'd result matches the
11986 // element size of the compare operands.
11987 if (VT.getSizeInBits() == N00VT.getSizeInBits())
11988 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11989 N0.getOperand(1),
11990 cast<CondCodeSDNode>(N0.getOperand(2))->get());
11992 // If the desired elements are smaller or larger than the source
11993 // elements we can use a matching integer vector type and then
11994 // truncate/any extend
11995 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11996 SDValue VsetCC =
11997 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11998 N0.getOperand(1),
11999 cast<CondCodeSDNode>(N0.getOperand(2))->get());
12000 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12003 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12004 SDLoc DL(N);
12005 if (SDValue SCC = SimplifySelectCC(
12006 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12007 DAG.getConstant(0, DL, VT),
12008 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12009 return SCC;
12012 if (SDValue NewCtPop = widenCtPop(N, DAG))
12013 return NewCtPop;
12015 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12016 return Res;
12018 return SDValue();
12021 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12022 unsigned Opcode = N->getOpcode();
12023 SDValue N0 = N->getOperand(0);
12024 SDValue N1 = N->getOperand(1);
12025 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12027 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12028 if (N0.getOpcode() == Opcode &&
12029 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12030 return N0;
12032 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12033 N0.getOperand(0).getOpcode() == Opcode) {
12034 // We have an assert, truncate, assert sandwich. Make one stronger assert
12035 // by asserting on the smallest asserted type to the larger source type.
12036 // This eliminates the later assert:
12037 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12038 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12039 SDValue BigA = N0.getOperand(0);
12040 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12041 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12042 "Asserting zero/sign-extended bits to a type larger than the "
12043 "truncated destination does not provide information");
12045 SDLoc DL(N);
12046 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12047 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12048 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12049 BigA.getOperand(0), MinAssertVTVal);
12050 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12053 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12054 // than X. Just move the AssertZext in front of the truncate and drop the
12055 // AssertSExt.
12056 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12057 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12058 Opcode == ISD::AssertZext) {
12059 SDValue BigA = N0.getOperand(0);
12060 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12061 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12062 "Asserting zero/sign-extended bits to a type larger than the "
12063 "truncated destination does not provide information");
12065 if (AssertVT.bitsLT(BigA_AssertVT)) {
12066 SDLoc DL(N);
12067 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12068 BigA.getOperand(0), N1);
12069 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12073 return SDValue();
12076 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12077 SDLoc DL(N);
12079 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12080 SDValue N0 = N->getOperand(0);
12082 // Fold (assertalign (assertalign x, AL0), AL1) ->
12083 // (assertalign x, max(AL0, AL1))
12084 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12085 return DAG.getAssertAlign(DL, N0.getOperand(0),
12086 std::max(AL, AAN->getAlign()));
12088 // In rare cases, there are trivial arithmetic ops in source operands. Sink
12089 // this assert down to source operands so that those arithmetic ops could be
12090 // exposed to the DAG combining.
12091 switch (N0.getOpcode()) {
12092 default:
12093 break;
12094 case ISD::ADD:
12095 case ISD::SUB: {
12096 unsigned AlignShift = Log2(AL);
12097 SDValue LHS = N0.getOperand(0);
12098 SDValue RHS = N0.getOperand(1);
12099 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12100 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12101 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12102 if (LHSAlignShift < AlignShift)
12103 LHS = DAG.getAssertAlign(DL, LHS, AL);
12104 if (RHSAlignShift < AlignShift)
12105 RHS = DAG.getAssertAlign(DL, RHS, AL);
12106 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12108 break;
12112 return SDValue();
12115 /// If the result of a load is shifted/masked/truncated to an effectively
12116 /// narrower type, try to transform the load to a narrower type and/or
12117 /// use an extending load.
12118 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12119 unsigned Opc = N->getOpcode();
12121 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12122 SDValue N0 = N->getOperand(0);
12123 EVT VT = N->getValueType(0);
12124 EVT ExtVT = VT;
12126 // This transformation isn't valid for vector loads.
12127 if (VT.isVector())
12128 return SDValue();
12130 // The ShAmt variable is used to indicate that we've consumed a right
12131 // shift. I.e. we want to narrow the width of the load by skipping to load the
12132 // ShAmt least significant bits.
12133 unsigned ShAmt = 0;
12134 // A special case is when the least significant bits from the load are masked
12135 // away, but using an AND rather than a right shift. HasShiftedOffset is used
12136 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12137 // the result.
12138 bool HasShiftedOffset = false;
12139 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12140 // extended to VT.
12141 if (Opc == ISD::SIGN_EXTEND_INREG) {
12142 ExtType = ISD::SEXTLOAD;
12143 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12144 } else if (Opc == ISD::SRL) {
12145 // Another special-case: SRL is basically zero-extending a narrower value,
12146 // or it may be shifting a higher subword, half or byte into the lowest
12147 // bits.
12149 // Only handle shift with constant shift amount, and the shiftee must be a
12150 // load.
12151 auto *LN = dyn_cast<LoadSDNode>(N0);
12152 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12153 if (!N1C || !LN)
12154 return SDValue();
12155 // If the shift amount is larger than the memory type then we're not
12156 // accessing any of the loaded bytes.
12157 ShAmt = N1C->getZExtValue();
12158 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12159 if (MemoryWidth <= ShAmt)
12160 return SDValue();
12161 // Attempt to fold away the SRL by using ZEXTLOAD.
12162 ExtType = ISD::ZEXTLOAD;
12163 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12164 // If original load is a SEXTLOAD then we can't simply replace it by a
12165 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12166 // followed by a ZEXT, but that is not handled at the moment).
12167 if (LN->getExtensionType() == ISD::SEXTLOAD)
12168 return SDValue();
12169 } else if (Opc == ISD::AND) {
12170 // An AND with a constant mask is the same as a truncate + zero-extend.
12171 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12172 if (!AndC)
12173 return SDValue();
12175 const APInt &Mask = AndC->getAPIntValue();
12176 unsigned ActiveBits = 0;
12177 if (Mask.isMask()) {
12178 ActiveBits = Mask.countTrailingOnes();
12179 } else if (Mask.isShiftedMask()) {
12180 ShAmt = Mask.countTrailingZeros();
12181 APInt ShiftedMask = Mask.lshr(ShAmt);
12182 ActiveBits = ShiftedMask.countTrailingOnes();
12183 HasShiftedOffset = true;
12184 } else
12185 return SDValue();
12187 ExtType = ISD::ZEXTLOAD;
12188 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12191 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12192 // a right shift. Here we redo some of those checks, to possibly adjust the
12193 // ExtVT even further based on "a masking AND". We could also end up here for
12194 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12195 // need to be done here as well.
12196 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12197 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12198 // Bail out when the SRL has more than one use. This is done for historical
12199 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12200 // check below? And maybe it could be non-profitable to do the transform in
12201 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12202 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12203 if (!SRL.hasOneUse())
12204 return SDValue();
12206 // Only handle shift with constant shift amount, and the shiftee must be a
12207 // load.
12208 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12209 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12210 if (!SRL1C || !LN)
12211 return SDValue();
12213 // If the shift amount is larger than the input type then we're not
12214 // accessing any of the loaded bytes. If the load was a zextload/extload
12215 // then the result of the shift+trunc is zero/undef (handled elsewhere).
12216 ShAmt = SRL1C->getZExtValue();
12217 if (ShAmt >= LN->getMemoryVT().getSizeInBits())
12218 return SDValue();
12220 // Because a SRL must be assumed to *need* to zero-extend the high bits
12221 // (as opposed to anyext the high bits), we can't combine the zextload
12222 // lowering of SRL and an sextload.
12223 if (LN->getExtensionType() == ISD::SEXTLOAD)
12224 return SDValue();
12226 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12227 // Is the shift amount a multiple of size of ExtVT?
12228 if ((ShAmt & (ExtVTBits - 1)) != 0)
12229 return SDValue();
12230 // Is the load width a multiple of size of ExtVT?
12231 if ((SRL.getScalarValueSizeInBits() & (ExtVTBits - 1)) != 0)
12232 return SDValue();
12234 // If the SRL is only used by a masking AND, we may be able to adjust
12235 // the ExtVT to make the AND redundant.
12236 SDNode *Mask = *(SRL->use_begin());
12237 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12238 isa<ConstantSDNode>(Mask->getOperand(1))) {
12239 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12240 if (ShiftMask.isMask()) {
12241 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12242 ShiftMask.countTrailingOnes());
12243 // If the mask is smaller, recompute the type.
12244 if ((ExtVTBits > MaskedVT.getScalarSizeInBits()) &&
12245 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12246 ExtVT = MaskedVT;
12250 N0 = SRL.getOperand(0);
12253 // If the load is shifted left (and the result isn't shifted back right), we
12254 // can fold a truncate through the shift. The typical scenario is that N
12255 // points at a TRUNCATE here so the attempted fold is:
12256 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12257 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12258 unsigned ShLeftAmt = 0;
12259 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12260 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12261 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12262 ShLeftAmt = N01->getZExtValue();
12263 N0 = N0.getOperand(0);
12267 // If we haven't found a load, we can't narrow it.
12268 if (!isa<LoadSDNode>(N0))
12269 return SDValue();
12271 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12272 // Reducing the width of a volatile load is illegal. For atomics, we may be
12273 // able to reduce the width provided we never widen again. (see D66309)
12274 if (!LN0->isSimple() ||
12275 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12276 return SDValue();
12278 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12279 unsigned LVTStoreBits =
12280 LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12281 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12282 return LVTStoreBits - EVTStoreBits - ShAmt;
12285 // We need to adjust the pointer to the load by ShAmt bits in order to load
12286 // the correct bytes.
12287 unsigned PtrAdjustmentInBits =
12288 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12290 uint64_t PtrOff = PtrAdjustmentInBits / 8;
12291 Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12292 SDLoc DL(LN0);
12293 // The original load itself didn't wrap, so an offset within it doesn't.
12294 SDNodeFlags Flags;
12295 Flags.setNoUnsignedWrap(true);
12296 SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12297 TypeSize::Fixed(PtrOff), DL, Flags);
12298 AddToWorklist(NewPtr.getNode());
12300 SDValue Load;
12301 if (ExtType == ISD::NON_EXTLOAD)
12302 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12303 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12304 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12305 else
12306 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12307 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12308 NewAlign, LN0->getMemOperand()->getFlags(),
12309 LN0->getAAInfo());
12311 // Replace the old load's chain with the new load's chain.
12312 WorklistRemover DeadNodes(*this);
12313 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12315 // Shift the result left, if we've swallowed a left shift.
12316 SDValue Result = Load;
12317 if (ShLeftAmt != 0) {
12318 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12319 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12320 ShImmTy = VT;
12321 // If the shift amount is as large as the result size (but, presumably,
12322 // no larger than the source) then the useful bits of the result are
12323 // zero; we can't simply return the shortened shift, because the result
12324 // of that operation is undefined.
12325 if (ShLeftAmt >= VT.getScalarSizeInBits())
12326 Result = DAG.getConstant(0, DL, VT);
12327 else
12328 Result = DAG.getNode(ISD::SHL, DL, VT,
12329 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12332 if (HasShiftedOffset) {
12333 // We're using a shifted mask, so the load now has an offset. This means
12334 // that data has been loaded into the lower bytes than it would have been
12335 // before, so we need to shl the loaded data into the correct position in the
12336 // register.
12337 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12338 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12339 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12342 // Return the new loaded value.
12343 return Result;
12346 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12347 SDValue N0 = N->getOperand(0);
12348 SDValue N1 = N->getOperand(1);
12349 EVT VT = N->getValueType(0);
12350 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12351 unsigned VTBits = VT.getScalarSizeInBits();
12352 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12354 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12355 if (N0.isUndef())
12356 return DAG.getConstant(0, SDLoc(N), VT);
12358 // fold (sext_in_reg c1) -> c1
12359 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12360 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12362 // If the input is already sign extended, just drop the extension.
12363 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12364 return N0;
12366 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12367 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12368 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12369 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12370 N1);
12372 // fold (sext_in_reg (sext x)) -> (sext x)
12373 // fold (sext_in_reg (aext x)) -> (sext x)
12374 // if x is small enough or if we know that x has more than 1 sign bit and the
12375 // sign_extend_inreg is extending from one of them.
12376 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12377 SDValue N00 = N0.getOperand(0);
12378 unsigned N00Bits = N00.getScalarValueSizeInBits();
12379 if ((N00Bits <= ExtVTBits ||
12380 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12381 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12382 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12385 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12386 // if x is small enough or if we know that x has more than 1 sign bit and the
12387 // sign_extend_inreg is extending from one of them.
12388 if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12389 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12390 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12391 SDValue N00 = N0.getOperand(0);
12392 unsigned N00Bits = N00.getScalarValueSizeInBits();
12393 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12394 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12395 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12396 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12397 if ((N00Bits == ExtVTBits ||
12398 (!IsZext && (N00Bits < ExtVTBits ||
12399 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12400 (!LegalOperations ||
12401 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12402 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12405 // fold (sext_in_reg (zext x)) -> (sext x)
12406 // iff we are extending the source sign bit.
12407 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12408 SDValue N00 = N0.getOperand(0);
12409 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12410 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12411 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12414 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12415 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12416 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12418 // fold operands of sext_in_reg based on knowledge that the top bits are not
12419 // demanded.
12420 if (SimplifyDemandedBits(SDValue(N, 0)))
12421 return SDValue(N, 0);
12423 // fold (sext_in_reg (load x)) -> (smaller sextload x)
12424 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12425 if (SDValue NarrowLoad = reduceLoadWidth(N))
12426 return NarrowLoad;
12428 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12429 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12430 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12431 if (N0.getOpcode() == ISD::SRL) {
12432 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12433 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12434 // We can turn this into an SRA iff the input to the SRL is already sign
12435 // extended enough.
12436 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12437 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12438 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12439 N0.getOperand(1));
12443 // fold (sext_inreg (extload x)) -> (sextload x)
12444 // If sextload is not supported by target, we can only do the combine when
12445 // load has one use. Doing otherwise can block folding the extload with other
12446 // extends that the target does support.
12447 if (ISD::isEXTLoad(N0.getNode()) &&
12448 ISD::isUNINDEXEDLoad(N0.getNode()) &&
12449 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12450 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12451 N0.hasOneUse()) ||
12452 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12453 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12454 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12455 LN0->getChain(),
12456 LN0->getBasePtr(), ExtVT,
12457 LN0->getMemOperand());
12458 CombineTo(N, ExtLoad);
12459 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12460 AddToWorklist(ExtLoad.getNode());
12461 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12464 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12465 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12466 N0.hasOneUse() &&
12467 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12468 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12469 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12470 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12471 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12472 LN0->getChain(),
12473 LN0->getBasePtr(), ExtVT,
12474 LN0->getMemOperand());
12475 CombineTo(N, ExtLoad);
12476 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12477 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12480 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12481 // ignore it if the masked load is already sign extended
12482 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12483 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12484 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12485 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12486 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12487 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12488 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12489 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12490 CombineTo(N, ExtMaskedLoad);
12491 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12492 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12496 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12497 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12498 if (SDValue(GN0, 0).hasOneUse() &&
12499 ExtVT == GN0->getMemoryVT() &&
12500 TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12501 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
12502 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
12504 SDValue ExtLoad = DAG.getMaskedGather(
12505 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12506 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12508 CombineTo(N, ExtLoad);
12509 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12510 AddToWorklist(ExtLoad.getNode());
12511 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12515 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12516 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12517 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12518 N0.getOperand(1), false))
12519 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12522 return SDValue();
12525 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12526 SDValue N0 = N->getOperand(0);
12527 EVT VT = N->getValueType(0);
12529 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12530 if (N0.isUndef())
12531 return DAG.getConstant(0, SDLoc(N), VT);
12533 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12534 return Res;
12536 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12537 return SDValue(N, 0);
12539 return SDValue();
12542 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12543 SDValue N0 = N->getOperand(0);
12544 EVT VT = N->getValueType(0);
12545 EVT SrcVT = N0.getValueType();
12546 bool isLE = DAG.getDataLayout().isLittleEndian();
12548 // noop truncate
12549 if (SrcVT == VT)
12550 return N0;
12552 // fold (truncate (truncate x)) -> (truncate x)
12553 if (N0.getOpcode() == ISD::TRUNCATE)
12554 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12556 // fold (truncate c1) -> c1
12557 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12558 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12559 if (C.getNode() != N)
12560 return C;
12563 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12564 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12565 N0.getOpcode() == ISD::SIGN_EXTEND ||
12566 N0.getOpcode() == ISD::ANY_EXTEND) {
12567 // if the source is smaller than the dest, we still need an extend.
12568 if (N0.getOperand(0).getValueType().bitsLT(VT))
12569 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12570 // if the source is larger than the dest, than we just need the truncate.
12571 if (N0.getOperand(0).getValueType().bitsGT(VT))
12572 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12573 // if the source and dest are the same type, we can drop both the extend
12574 // and the truncate.
12575 return N0.getOperand(0);
12578 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12579 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12580 return SDValue();
12582 // Fold extract-and-trunc into a narrow extract. For example:
12583 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12584 // i32 y = TRUNCATE(i64 x)
12585 // -- becomes --
12586 // v16i8 b = BITCAST (v2i64 val)
12587 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12589 // Note: We only run this optimization after type legalization (which often
12590 // creates this pattern) and before operation legalization after which
12591 // we need to be more careful about the vector instructions that we generate.
12592 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12593 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12594 EVT VecTy = N0.getOperand(0).getValueType();
12595 EVT ExTy = N0.getValueType();
12596 EVT TrTy = N->getValueType(0);
12598 auto EltCnt = VecTy.getVectorElementCount();
12599 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12600 auto NewEltCnt = EltCnt * SizeRatio;
12602 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12603 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12605 SDValue EltNo = N0->getOperand(1);
12606 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12607 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12608 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12610 SDLoc DL(N);
12611 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12612 DAG.getBitcast(NVT, N0.getOperand(0)),
12613 DAG.getVectorIdxConstant(Index, DL));
12617 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12618 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12619 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12620 TLI.isTruncateFree(SrcVT, VT)) {
12621 SDLoc SL(N0);
12622 SDValue Cond = N0.getOperand(0);
12623 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12624 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12625 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12629 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12630 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12631 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12632 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12633 SDValue Amt = N0.getOperand(1);
12634 KnownBits Known = DAG.computeKnownBits(Amt);
12635 unsigned Size = VT.getScalarSizeInBits();
12636 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
12637 SDLoc SL(N);
12638 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12640 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12641 if (AmtVT != Amt.getValueType()) {
12642 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12643 AddToWorklist(Amt.getNode());
12645 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12649 if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12650 return V;
12652 // Attempt to pre-truncate BUILD_VECTOR sources.
12653 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12654 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12655 // Avoid creating illegal types if running after type legalizer.
12656 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12657 SDLoc DL(N);
12658 EVT SVT = VT.getScalarType();
12659 SmallVector<SDValue, 8> TruncOps;
12660 for (const SDValue &Op : N0->op_values()) {
12661 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12662 TruncOps.push_back(TruncOp);
12664 return DAG.getBuildVector(VT, DL, TruncOps);
12667 // Fold a series of buildvector, bitcast, and truncate if possible.
12668 // For example fold
12669 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12670 // (2xi32 (buildvector x, y)).
12671 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12672 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12673 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12674 N0.getOperand(0).hasOneUse()) {
12675 SDValue BuildVect = N0.getOperand(0);
12676 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12677 EVT TruncVecEltTy = VT.getVectorElementType();
12679 // Check that the element types match.
12680 if (BuildVectEltTy == TruncVecEltTy) {
12681 // Now we only need to compute the offset of the truncated elements.
12682 unsigned BuildVecNumElts = BuildVect.getNumOperands();
12683 unsigned TruncVecNumElts = VT.getVectorNumElements();
12684 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12686 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12687 "Invalid number of elements");
12689 SmallVector<SDValue, 8> Opnds;
12690 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12691 Opnds.push_back(BuildVect.getOperand(i));
12693 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12697 // See if we can simplify the input to this truncate through knowledge that
12698 // only the low bits are being used.
12699 // For example "trunc (or (shl x, 8), y)" // -> trunc y
12700 // Currently we only perform this optimization on scalars because vectors
12701 // may have different active low bits.
12702 if (!VT.isVector()) {
12703 APInt Mask =
12704 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12705 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12706 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12709 // fold (truncate (load x)) -> (smaller load x)
12710 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12711 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12712 if (SDValue Reduced = reduceLoadWidth(N))
12713 return Reduced;
12715 // Handle the case where the load remains an extending load even
12716 // after truncation.
12717 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12718 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12719 if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12720 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12721 VT, LN0->getChain(), LN0->getBasePtr(),
12722 LN0->getMemoryVT(),
12723 LN0->getMemOperand());
12724 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12725 return NewLoad;
12730 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12731 // where ... are all 'undef'.
12732 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12733 SmallVector<EVT, 8> VTs;
12734 SDValue V;
12735 unsigned Idx = 0;
12736 unsigned NumDefs = 0;
12738 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12739 SDValue X = N0.getOperand(i);
12740 if (!X.isUndef()) {
12741 V = X;
12742 Idx = i;
12743 NumDefs++;
12745 // Stop if more than one members are non-undef.
12746 if (NumDefs > 1)
12747 break;
12749 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12750 VT.getVectorElementType(),
12751 X.getValueType().getVectorElementCount()));
12754 if (NumDefs == 0)
12755 return DAG.getUNDEF(VT);
12757 if (NumDefs == 1) {
12758 assert(V.getNode() && "The single defined operand is empty!");
12759 SmallVector<SDValue, 8> Opnds;
12760 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12761 if (i != Idx) {
12762 Opnds.push_back(DAG.getUNDEF(VTs[i]));
12763 continue;
12765 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12766 AddToWorklist(NV.getNode());
12767 Opnds.push_back(NV);
12769 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12773 // Fold truncate of a bitcast of a vector to an extract of the low vector
12774 // element.
12776 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12777 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12778 SDValue VecSrc = N0.getOperand(0);
12779 EVT VecSrcVT = VecSrc.getValueType();
12780 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12781 (!LegalOperations ||
12782 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12783 SDLoc SL(N);
12785 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12786 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12787 DAG.getVectorIdxConstant(Idx, SL));
12791 // Simplify the operands using demanded-bits information.
12792 if (SimplifyDemandedBits(SDValue(N, 0)))
12793 return SDValue(N, 0);
12795 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12796 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12797 // When the adde's carry is not used.
12798 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12799 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12800 // We only do for addcarry before legalize operation
12801 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12802 TLI.isOperationLegal(N0.getOpcode(), VT))) {
12803 SDLoc SL(N);
12804 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12805 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12806 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12807 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12810 // fold (truncate (extract_subvector(ext x))) ->
12811 // (extract_subvector x)
12812 // TODO: This can be generalized to cover cases where the truncate and extract
12813 // do not fully cancel each other out.
12814 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12815 SDValue N00 = N0.getOperand(0);
12816 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12817 N00.getOpcode() == ISD::ZERO_EXTEND ||
12818 N00.getOpcode() == ISD::ANY_EXTEND) {
12819 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12820 VT.getVectorElementType())
12821 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12822 N00.getOperand(0), N0.getOperand(1));
12826 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12827 return NewVSel;
12829 // Narrow a suitable binary operation with a non-opaque constant operand by
12830 // moving it ahead of the truncate. This is limited to pre-legalization
12831 // because targets may prefer a wider type during later combines and invert
12832 // this transform.
12833 switch (N0.getOpcode()) {
12834 case ISD::ADD:
12835 case ISD::SUB:
12836 case ISD::MUL:
12837 case ISD::AND:
12838 case ISD::OR:
12839 case ISD::XOR:
12840 if (!LegalOperations && N0.hasOneUse() &&
12841 (isConstantOrConstantVector(N0.getOperand(0), true) ||
12842 isConstantOrConstantVector(N0.getOperand(1), true))) {
12843 // TODO: We already restricted this to pre-legalization, but for vectors
12844 // we are extra cautious to not create an unsupported operation.
12845 // Target-specific changes are likely needed to avoid regressions here.
12846 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12847 SDLoc DL(N);
12848 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12849 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12850 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12853 break;
12854 case ISD::USUBSAT:
12855 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12856 // enough to know that the upper bits are zero we must ensure that we don't
12857 // introduce an extra truncate.
12858 if (!LegalOperations && N0.hasOneUse() &&
12859 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12860 N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12861 VT.getScalarSizeInBits() &&
12862 hasOperation(N0.getOpcode(), VT)) {
12863 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12864 DAG, SDLoc(N));
12866 break;
12869 return SDValue();
12872 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12873 SDValue Elt = N->getOperand(i);
12874 if (Elt.getOpcode() != ISD::MERGE_VALUES)
12875 return Elt.getNode();
12876 return Elt.getOperand(Elt.getResNo()).getNode();
12879 /// build_pair (load, load) -> load
12880 /// if load locations are consecutive.
12881 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12882 assert(N->getOpcode() == ISD::BUILD_PAIR);
12884 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12885 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12887 // A BUILD_PAIR is always having the least significant part in elt 0 and the
12888 // most significant part in elt 1. So when combining into one large load, we
12889 // need to consider the endianness.
12890 if (DAG.getDataLayout().isBigEndian())
12891 std::swap(LD1, LD2);
12893 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
12894 !LD1->hasOneUse() || !LD2->hasOneUse() ||
12895 LD1->getAddressSpace() != LD2->getAddressSpace())
12896 return SDValue();
12898 bool LD1Fast = false;
12899 EVT LD1VT = LD1->getValueType(0);
12900 unsigned LD1Bytes = LD1VT.getStoreSize();
12901 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
12902 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
12903 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
12904 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
12905 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12906 LD1->getPointerInfo(), LD1->getAlign());
12908 return SDValue();
12911 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12912 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12913 // and Lo parts; on big-endian machines it doesn't.
12914 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12917 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12918 const TargetLowering &TLI) {
12919 // If this is not a bitcast to an FP type or if the target doesn't have
12920 // IEEE754-compliant FP logic, we're done.
12921 EVT VT = N->getValueType(0);
12922 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12923 return SDValue();
12925 // TODO: Handle cases where the integer constant is a different scalar
12926 // bitwidth to the FP.
12927 SDValue N0 = N->getOperand(0);
12928 EVT SourceVT = N0.getValueType();
12929 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12930 return SDValue();
12932 unsigned FPOpcode;
12933 APInt SignMask;
12934 switch (N0.getOpcode()) {
12935 case ISD::AND:
12936 FPOpcode = ISD::FABS;
12937 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12938 break;
12939 case ISD::XOR:
12940 FPOpcode = ISD::FNEG;
12941 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12942 break;
12943 case ISD::OR:
12944 FPOpcode = ISD::FABS;
12945 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12946 break;
12947 default:
12948 return SDValue();
12951 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12952 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12953 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12954 // fneg (fabs X)
12955 SDValue LogicOp0 = N0.getOperand(0);
12956 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12957 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12958 LogicOp0.getOpcode() == ISD::BITCAST &&
12959 LogicOp0.getOperand(0).getValueType() == VT) {
12960 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12961 NumFPLogicOpsConv++;
12962 if (N0.getOpcode() == ISD::OR)
12963 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12964 return FPOp;
12967 return SDValue();
12970 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12971 SDValue N0 = N->getOperand(0);
12972 EVT VT = N->getValueType(0);
12974 if (N0.isUndef())
12975 return DAG.getUNDEF(VT);
12977 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12978 // Only do this before legalize types, unless both types are integer and the
12979 // scalar type is legal. Only do this before legalize ops, since the target
12980 // maybe depending on the bitcast.
12981 // First check to see if this is all constant.
12982 // TODO: Support FP bitcasts after legalize types.
12983 if (VT.isVector() &&
12984 (!LegalTypes ||
12985 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12986 TLI.isTypeLegal(VT.getVectorElementType()))) &&
12987 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12988 cast<BuildVectorSDNode>(N0)->isConstant())
12989 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12990 VT.getVectorElementType());
12992 // If the input is a constant, let getNode fold it.
12993 if (isIntOrFPConstant(N0)) {
12994 // If we can't allow illegal operations, we need to check that this is just
12995 // a fp -> int or int -> conversion and that the resulting operation will
12996 // be legal.
12997 if (!LegalOperations ||
12998 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12999 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13000 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13001 TLI.isOperationLegal(ISD::Constant, VT))) {
13002 SDValue C = DAG.getBitcast(VT, N0);
13003 if (C.getNode() != N)
13004 return C;
13008 // (conv (conv x, t1), t2) -> (conv x, t2)
13009 if (N0.getOpcode() == ISD::BITCAST)
13010 return DAG.getBitcast(VT, N0.getOperand(0));
13012 // fold (conv (load x)) -> (load (conv*)x)
13013 // If the resultant load doesn't need a higher alignment than the original!
13014 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13015 // Do not remove the cast if the types differ in endian layout.
13016 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13017 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13018 // If the load is volatile, we only want to change the load type if the
13019 // resulting load is legal. Otherwise we might increase the number of
13020 // memory accesses. We don't care if the original type was legal or not
13021 // as we assume software couldn't rely on the number of accesses of an
13022 // illegal type.
13023 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13024 TLI.isOperationLegal(ISD::LOAD, VT))) {
13025 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13027 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13028 *LN0->getMemOperand())) {
13029 SDValue Load =
13030 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13031 LN0->getPointerInfo(), LN0->getAlign(),
13032 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13033 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13034 return Load;
13038 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13039 return V;
13041 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13042 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13044 // For ppc_fp128:
13045 // fold (bitcast (fneg x)) ->
13046 // flipbit = signbit
13047 // (xor (bitcast x) (build_pair flipbit, flipbit))
13049 // fold (bitcast (fabs x)) ->
13050 // flipbit = (and (extract_element (bitcast x), 0), signbit)
13051 // (xor (bitcast x) (build_pair flipbit, flipbit))
13052 // This often reduces constant pool loads.
13053 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13054 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13055 N0.getNode()->hasOneUse() && VT.isInteger() &&
13056 !VT.isVector() && !N0.getValueType().isVector()) {
13057 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13058 AddToWorklist(NewConv.getNode());
13060 SDLoc DL(N);
13061 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13062 assert(VT.getSizeInBits() == 128);
13063 SDValue SignBit = DAG.getConstant(
13064 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13065 SDValue FlipBit;
13066 if (N0.getOpcode() == ISD::FNEG) {
13067 FlipBit = SignBit;
13068 AddToWorklist(FlipBit.getNode());
13069 } else {
13070 assert(N0.getOpcode() == ISD::FABS);
13071 SDValue Hi =
13072 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13073 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13074 SDLoc(NewConv)));
13075 AddToWorklist(Hi.getNode());
13076 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13077 AddToWorklist(FlipBit.getNode());
13079 SDValue FlipBits =
13080 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13081 AddToWorklist(FlipBits.getNode());
13082 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13084 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13085 if (N0.getOpcode() == ISD::FNEG)
13086 return DAG.getNode(ISD::XOR, DL, VT,
13087 NewConv, DAG.getConstant(SignBit, DL, VT));
13088 assert(N0.getOpcode() == ISD::FABS);
13089 return DAG.getNode(ISD::AND, DL, VT,
13090 NewConv, DAG.getConstant(~SignBit, DL, VT));
13093 // fold (bitconvert (fcopysign cst, x)) ->
13094 // (or (and (bitconvert x), sign), (and cst, (not sign)))
13095 // Note that we don't handle (copysign x, cst) because this can always be
13096 // folded to an fneg or fabs.
13098 // For ppc_fp128:
13099 // fold (bitcast (fcopysign cst, x)) ->
13100 // flipbit = (and (extract_element
13101 // (xor (bitcast cst), (bitcast x)), 0),
13102 // signbit)
13103 // (xor (bitcast cst) (build_pair flipbit, flipbit))
13104 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
13105 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
13106 VT.isInteger() && !VT.isVector()) {
13107 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13108 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13109 if (isTypeLegal(IntXVT)) {
13110 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13111 AddToWorklist(X.getNode());
13113 // If X has a different width than the result/lhs, sext it or truncate it.
13114 unsigned VTWidth = VT.getSizeInBits();
13115 if (OrigXWidth < VTWidth) {
13116 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13117 AddToWorklist(X.getNode());
13118 } else if (OrigXWidth > VTWidth) {
13119 // To get the sign bit in the right place, we have to shift it right
13120 // before truncating.
13121 SDLoc DL(X);
13122 X = DAG.getNode(ISD::SRL, DL,
13123 X.getValueType(), X,
13124 DAG.getConstant(OrigXWidth-VTWidth, DL,
13125 X.getValueType()));
13126 AddToWorklist(X.getNode());
13127 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13128 AddToWorklist(X.getNode());
13131 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13132 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13133 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13134 AddToWorklist(Cst.getNode());
13135 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13136 AddToWorklist(X.getNode());
13137 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13138 AddToWorklist(XorResult.getNode());
13139 SDValue XorResult64 = DAG.getNode(
13140 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13141 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13142 SDLoc(XorResult)));
13143 AddToWorklist(XorResult64.getNode());
13144 SDValue FlipBit =
13145 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13146 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13147 AddToWorklist(FlipBit.getNode());
13148 SDValue FlipBits =
13149 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13150 AddToWorklist(FlipBits.getNode());
13151 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13153 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13154 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13155 X, DAG.getConstant(SignBit, SDLoc(X), VT));
13156 AddToWorklist(X.getNode());
13158 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13159 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13160 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13161 AddToWorklist(Cst.getNode());
13163 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13167 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13168 if (N0.getOpcode() == ISD::BUILD_PAIR)
13169 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13170 return CombineLD;
13172 // Remove double bitcasts from shuffles - this is often a legacy of
13173 // XformToShuffleWithZero being used to combine bitmaskings (of
13174 // float vectors bitcast to integer vectors) into shuffles.
13175 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13176 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13177 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13178 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13179 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13180 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13182 // If operands are a bitcast, peek through if it casts the original VT.
13183 // If operands are a constant, just bitcast back to original VT.
13184 auto PeekThroughBitcast = [&](SDValue Op) {
13185 if (Op.getOpcode() == ISD::BITCAST &&
13186 Op.getOperand(0).getValueType() == VT)
13187 return SDValue(Op.getOperand(0));
13188 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13189 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
13190 return DAG.getBitcast(VT, Op);
13191 return SDValue();
13194 // FIXME: If either input vector is bitcast, try to convert the shuffle to
13195 // the result type of this bitcast. This would eliminate at least one
13196 // bitcast. See the transform in InstCombine.
13197 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13198 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13199 if (!(SV0 && SV1))
13200 return SDValue();
13202 int MaskScale =
13203 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13204 SmallVector<int, 8> NewMask;
13205 for (int M : SVN->getMask())
13206 for (int i = 0; i != MaskScale; ++i)
13207 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13209 SDValue LegalShuffle =
13210 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13211 if (LegalShuffle)
13212 return LegalShuffle;
13215 return SDValue();
13218 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13219 EVT VT = N->getValueType(0);
13220 return CombineConsecutiveLoads(N, VT);
13223 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13224 SDValue N0 = N->getOperand(0);
13226 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13227 return N0;
13229 return SDValue();
13232 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13233 /// operands. DstEltVT indicates the destination element value type.
13234 SDValue DAGCombiner::
13235 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13236 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13238 // If this is already the right type, we're done.
13239 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13241 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13242 unsigned DstBitSize = DstEltVT.getSizeInBits();
13244 // If this is a conversion of N elements of one type to N elements of another
13245 // type, convert each element. This handles FP<->INT cases.
13246 if (SrcBitSize == DstBitSize) {
13247 SmallVector<SDValue, 8> Ops;
13248 for (SDValue Op : BV->op_values()) {
13249 // If the vector element type is not legal, the BUILD_VECTOR operands
13250 // are promoted and implicitly truncated. Make that explicit here.
13251 if (Op.getValueType() != SrcEltVT)
13252 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13253 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13254 AddToWorklist(Ops.back().getNode());
13256 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13257 BV->getValueType(0).getVectorNumElements());
13258 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13261 // Otherwise, we're growing or shrinking the elements. To avoid having to
13262 // handle annoying details of growing/shrinking FP values, we convert them to
13263 // int first.
13264 if (SrcEltVT.isFloatingPoint()) {
13265 // Convert the input float vector to a int vector where the elements are the
13266 // same sizes.
13267 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13268 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13269 SrcEltVT = IntVT;
13272 // Now we know the input is an integer vector. If the output is a FP type,
13273 // convert to integer first, then to FP of the right size.
13274 if (DstEltVT.isFloatingPoint()) {
13275 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13276 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13278 // Next, convert to FP elements of the same size.
13279 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13282 // Okay, we know the src/dst types are both integers of differing types.
13283 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13285 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13286 // BuildVectorSDNode?
13287 auto *BVN = cast<BuildVectorSDNode>(BV);
13289 // Extract the constant raw bit data.
13290 BitVector UndefElements;
13291 SmallVector<APInt> RawBits;
13292 bool IsLE = DAG.getDataLayout().isLittleEndian();
13293 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13294 return SDValue();
13296 SDLoc DL(BV);
13297 SmallVector<SDValue, 8> Ops;
13298 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13299 if (UndefElements[I])
13300 Ops.push_back(DAG.getUNDEF(DstEltVT));
13301 else
13302 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13305 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13306 return DAG.getBuildVector(VT, DL, Ops);
13309 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13310 // `N`
13311 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13312 assert(N.getOpcode() == ISD::FMUL);
13314 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13315 N->getFlags().hasAllowContract();
13318 // Returns true if `N` can assume no infinities involved in its computation.
13319 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13320 return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13323 /// Try to perform FMA combining on a given FADD node.
13324 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13325 SDValue N0 = N->getOperand(0);
13326 SDValue N1 = N->getOperand(1);
13327 EVT VT = N->getValueType(0);
13328 SDLoc SL(N);
13330 const TargetOptions &Options = DAG.getTarget().Options;
13332 // Floating-point multiply-add with intermediate rounding.
13333 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13335 // Floating-point multiply-add without intermediate rounding.
13336 bool HasFMA =
13337 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13338 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13340 // No valid opcode, do not combine.
13341 if (!HasFMAD && !HasFMA)
13342 return SDValue();
13344 bool CanReassociate =
13345 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13346 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13347 Options.UnsafeFPMath || HasFMAD);
13348 // If the addition is not contractable, do not combine.
13349 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13350 return SDValue();
13352 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13353 return SDValue();
13355 // Always prefer FMAD to FMA for precision.
13356 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13357 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13359 auto isFusedOp = [&](SDValue N) {
13360 unsigned Opcode = N.getOpcode();
13361 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13364 // Is the node an FMUL and contractable either due to global flags or
13365 // SDNodeFlags.
13366 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13367 if (N.getOpcode() != ISD::FMUL)
13368 return false;
13369 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13371 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13372 // prefer to fold the multiply with fewer uses.
13373 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13374 if (N0.getNode()->use_size() > N1.getNode()->use_size())
13375 std::swap(N0, N1);
13378 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13379 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13380 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13381 N0.getOperand(1), N1);
13384 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13385 // Note: Commutes FADD operands.
13386 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13387 return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13388 N1.getOperand(1), N0);
13391 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13392 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13393 // This requires reassociation because it changes the order of operations.
13394 SDValue FMA, E;
13395 if (CanReassociate && isFusedOp(N0) &&
13396 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13397 N0.getOperand(2).hasOneUse()) {
13398 FMA = N0;
13399 E = N1;
13400 } else if (CanReassociate && isFusedOp(N1) &&
13401 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13402 N1.getOperand(2).hasOneUse()) {
13403 FMA = N1;
13404 E = N0;
13406 if (FMA && E) {
13407 SDValue A = FMA.getOperand(0);
13408 SDValue B = FMA.getOperand(1);
13409 SDValue C = FMA.getOperand(2).getOperand(0);
13410 SDValue D = FMA.getOperand(2).getOperand(1);
13411 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13412 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13415 // Look through FP_EXTEND nodes to do more combining.
13417 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13418 if (N0.getOpcode() == ISD::FP_EXTEND) {
13419 SDValue N00 = N0.getOperand(0);
13420 if (isContractableFMUL(N00) &&
13421 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13422 N00.getValueType())) {
13423 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13424 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13425 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13426 N1);
13430 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13431 // Note: Commutes FADD operands.
13432 if (N1.getOpcode() == ISD::FP_EXTEND) {
13433 SDValue N10 = N1.getOperand(0);
13434 if (isContractableFMUL(N10) &&
13435 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13436 N10.getValueType())) {
13437 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13438 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13439 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13440 N0);
13444 // More folding opportunities when target permits.
13445 if (Aggressive) {
13446 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13447 // -> (fma x, y, (fma (fpext u), (fpext v), z))
13448 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13449 SDValue Z) {
13450 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13451 DAG.getNode(PreferredFusedOpcode, SL, VT,
13452 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13453 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13454 Z));
13456 if (isFusedOp(N0)) {
13457 SDValue N02 = N0.getOperand(2);
13458 if (N02.getOpcode() == ISD::FP_EXTEND) {
13459 SDValue N020 = N02.getOperand(0);
13460 if (isContractableFMUL(N020) &&
13461 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13462 N020.getValueType())) {
13463 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13464 N020.getOperand(0), N020.getOperand(1),
13465 N1);
13470 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13471 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13472 // FIXME: This turns two single-precision and one double-precision
13473 // operation into two double-precision operations, which might not be
13474 // interesting for all targets, especially GPUs.
13475 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13476 SDValue Z) {
13477 return DAG.getNode(
13478 PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13479 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13480 DAG.getNode(PreferredFusedOpcode, SL, VT,
13481 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13482 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13484 if (N0.getOpcode() == ISD::FP_EXTEND) {
13485 SDValue N00 = N0.getOperand(0);
13486 if (isFusedOp(N00)) {
13487 SDValue N002 = N00.getOperand(2);
13488 if (isContractableFMUL(N002) &&
13489 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13490 N00.getValueType())) {
13491 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13492 N002.getOperand(0), N002.getOperand(1),
13493 N1);
13498 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13499 // -> (fma y, z, (fma (fpext u), (fpext v), x))
13500 if (isFusedOp(N1)) {
13501 SDValue N12 = N1.getOperand(2);
13502 if (N12.getOpcode() == ISD::FP_EXTEND) {
13503 SDValue N120 = N12.getOperand(0);
13504 if (isContractableFMUL(N120) &&
13505 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13506 N120.getValueType())) {
13507 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13508 N120.getOperand(0), N120.getOperand(1),
13509 N0);
13514 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13515 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13516 // FIXME: This turns two single-precision and one double-precision
13517 // operation into two double-precision operations, which might not be
13518 // interesting for all targets, especially GPUs.
13519 if (N1.getOpcode() == ISD::FP_EXTEND) {
13520 SDValue N10 = N1.getOperand(0);
13521 if (isFusedOp(N10)) {
13522 SDValue N102 = N10.getOperand(2);
13523 if (isContractableFMUL(N102) &&
13524 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13525 N10.getValueType())) {
13526 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13527 N102.getOperand(0), N102.getOperand(1),
13528 N0);
13534 return SDValue();
13537 /// Try to perform FMA combining on a given FSUB node.
13538 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13539 SDValue N0 = N->getOperand(0);
13540 SDValue N1 = N->getOperand(1);
13541 EVT VT = N->getValueType(0);
13542 SDLoc SL(N);
13544 const TargetOptions &Options = DAG.getTarget().Options;
13545 // Floating-point multiply-add with intermediate rounding.
13546 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13548 // Floating-point multiply-add without intermediate rounding.
13549 bool HasFMA =
13550 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13551 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13553 // No valid opcode, do not combine.
13554 if (!HasFMAD && !HasFMA)
13555 return SDValue();
13557 const SDNodeFlags Flags = N->getFlags();
13558 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13559 Options.UnsafeFPMath || HasFMAD);
13561 // If the subtraction is not contractable, do not combine.
13562 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13563 return SDValue();
13565 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13566 return SDValue();
13568 // Always prefer FMAD to FMA for precision.
13569 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13570 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13571 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13573 // Is the node an FMUL and contractable either due to global flags or
13574 // SDNodeFlags.
13575 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13576 if (N.getOpcode() != ISD::FMUL)
13577 return false;
13578 return AllowFusionGlobally || N->getFlags().hasAllowContract();
13581 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13582 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13583 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13584 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13585 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13587 return SDValue();
13590 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13591 // Note: Commutes FSUB operands.
13592 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13593 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13594 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13595 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13596 YZ.getOperand(1), X);
13598 return SDValue();
13601 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13602 // prefer to fold the multiply with fewer uses.
13603 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13604 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13605 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13606 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13607 return V;
13608 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13609 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13610 return V;
13611 } else {
13612 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13613 if (SDValue V = tryToFoldXYSubZ(N0, N1))
13614 return V;
13615 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13616 if (SDValue V = tryToFoldXSubYZ(N0, N1))
13617 return V;
13620 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13621 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13622 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13623 SDValue N00 = N0.getOperand(0).getOperand(0);
13624 SDValue N01 = N0.getOperand(0).getOperand(1);
13625 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13626 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13627 DAG.getNode(ISD::FNEG, SL, VT, N1));
13630 // Look through FP_EXTEND nodes to do more combining.
13632 // fold (fsub (fpext (fmul x, y)), z)
13633 // -> (fma (fpext x), (fpext y), (fneg z))
13634 if (N0.getOpcode() == ISD::FP_EXTEND) {
13635 SDValue N00 = N0.getOperand(0);
13636 if (isContractableFMUL(N00) &&
13637 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13638 N00.getValueType())) {
13639 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13640 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13641 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13642 DAG.getNode(ISD::FNEG, SL, VT, N1));
13646 // fold (fsub x, (fpext (fmul y, z)))
13647 // -> (fma (fneg (fpext y)), (fpext z), x)
13648 // Note: Commutes FSUB operands.
13649 if (N1.getOpcode() == ISD::FP_EXTEND) {
13650 SDValue N10 = N1.getOperand(0);
13651 if (isContractableFMUL(N10) &&
13652 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13653 N10.getValueType())) {
13654 return DAG.getNode(
13655 PreferredFusedOpcode, SL, VT,
13656 DAG.getNode(ISD::FNEG, SL, VT,
13657 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13658 DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13662 // fold (fsub (fpext (fneg (fmul, x, y))), z)
13663 // -> (fneg (fma (fpext x), (fpext y), z))
13664 // Note: This could be removed with appropriate canonicalization of the
13665 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13666 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13667 // from implementing the canonicalization in visitFSUB.
13668 if (N0.getOpcode() == ISD::FP_EXTEND) {
13669 SDValue N00 = N0.getOperand(0);
13670 if (N00.getOpcode() == ISD::FNEG) {
13671 SDValue N000 = N00.getOperand(0);
13672 if (isContractableFMUL(N000) &&
13673 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13674 N00.getValueType())) {
13675 return DAG.getNode(
13676 ISD::FNEG, SL, VT,
13677 DAG.getNode(PreferredFusedOpcode, SL, VT,
13678 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13679 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13680 N1));
13685 // fold (fsub (fneg (fpext (fmul, x, y))), z)
13686 // -> (fneg (fma (fpext x)), (fpext y), z)
13687 // Note: This could be removed with appropriate canonicalization of the
13688 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13689 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13690 // from implementing the canonicalization in visitFSUB.
13691 if (N0.getOpcode() == ISD::FNEG) {
13692 SDValue N00 = N0.getOperand(0);
13693 if (N00.getOpcode() == ISD::FP_EXTEND) {
13694 SDValue N000 = N00.getOperand(0);
13695 if (isContractableFMUL(N000) &&
13696 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13697 N000.getValueType())) {
13698 return DAG.getNode(
13699 ISD::FNEG, SL, VT,
13700 DAG.getNode(PreferredFusedOpcode, SL, VT,
13701 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13702 DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13703 N1));
13708 auto isReassociable = [Options](SDNode *N) {
13709 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13712 auto isContractableAndReassociableFMUL = [isContractableFMUL,
13713 isReassociable](SDValue N) {
13714 return isContractableFMUL(N) && isReassociable(N.getNode());
13717 auto isFusedOp = [&](SDValue N) {
13718 unsigned Opcode = N.getOpcode();
13719 return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13722 // More folding opportunities when target permits.
13723 if (Aggressive && isReassociable(N)) {
13724 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13725 // fold (fsub (fma x, y, (fmul u, v)), z)
13726 // -> (fma x, y (fma u, v, (fneg z)))
13727 if (CanFuse && isFusedOp(N0) &&
13728 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13729 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13730 return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13731 N0.getOperand(1),
13732 DAG.getNode(PreferredFusedOpcode, SL, VT,
13733 N0.getOperand(2).getOperand(0),
13734 N0.getOperand(2).getOperand(1),
13735 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13738 // fold (fsub x, (fma y, z, (fmul u, v)))
13739 // -> (fma (fneg y), z, (fma (fneg u), v, x))
13740 if (CanFuse && isFusedOp(N1) &&
13741 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13742 N1->hasOneUse() && NoSignedZero) {
13743 SDValue N20 = N1.getOperand(2).getOperand(0);
13744 SDValue N21 = N1.getOperand(2).getOperand(1);
13745 return DAG.getNode(
13746 PreferredFusedOpcode, SL, VT,
13747 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13748 DAG.getNode(PreferredFusedOpcode, SL, VT,
13749 DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13752 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13753 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13754 if (isFusedOp(N0) && N0->hasOneUse()) {
13755 SDValue N02 = N0.getOperand(2);
13756 if (N02.getOpcode() == ISD::FP_EXTEND) {
13757 SDValue N020 = N02.getOperand(0);
13758 if (isContractableAndReassociableFMUL(N020) &&
13759 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13760 N020.getValueType())) {
13761 return DAG.getNode(
13762 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13763 DAG.getNode(
13764 PreferredFusedOpcode, SL, VT,
13765 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13766 DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13767 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13772 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13773 // -> (fma (fpext x), (fpext y),
13774 // (fma (fpext u), (fpext v), (fneg z)))
13775 // FIXME: This turns two single-precision and one double-precision
13776 // operation into two double-precision operations, which might not be
13777 // interesting for all targets, especially GPUs.
13778 if (N0.getOpcode() == ISD::FP_EXTEND) {
13779 SDValue N00 = N0.getOperand(0);
13780 if (isFusedOp(N00)) {
13781 SDValue N002 = N00.getOperand(2);
13782 if (isContractableAndReassociableFMUL(N002) &&
13783 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13784 N00.getValueType())) {
13785 return DAG.getNode(
13786 PreferredFusedOpcode, SL, VT,
13787 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13788 DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13789 DAG.getNode(
13790 PreferredFusedOpcode, SL, VT,
13791 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13792 DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13793 DAG.getNode(ISD::FNEG, SL, VT, N1)));
13798 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13799 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13800 if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13801 N1->hasOneUse()) {
13802 SDValue N120 = N1.getOperand(2).getOperand(0);
13803 if (isContractableAndReassociableFMUL(N120) &&
13804 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13805 N120.getValueType())) {
13806 SDValue N1200 = N120.getOperand(0);
13807 SDValue N1201 = N120.getOperand(1);
13808 return DAG.getNode(
13809 PreferredFusedOpcode, SL, VT,
13810 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13811 DAG.getNode(PreferredFusedOpcode, SL, VT,
13812 DAG.getNode(ISD::FNEG, SL, VT,
13813 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13814 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13818 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13819 // -> (fma (fneg (fpext y)), (fpext z),
13820 // (fma (fneg (fpext u)), (fpext v), x))
13821 // FIXME: This turns two single-precision and one double-precision
13822 // operation into two double-precision operations, which might not be
13823 // interesting for all targets, especially GPUs.
13824 if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13825 SDValue CvtSrc = N1.getOperand(0);
13826 SDValue N100 = CvtSrc.getOperand(0);
13827 SDValue N101 = CvtSrc.getOperand(1);
13828 SDValue N102 = CvtSrc.getOperand(2);
13829 if (isContractableAndReassociableFMUL(N102) &&
13830 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13831 CvtSrc.getValueType())) {
13832 SDValue N1020 = N102.getOperand(0);
13833 SDValue N1021 = N102.getOperand(1);
13834 return DAG.getNode(
13835 PreferredFusedOpcode, SL, VT,
13836 DAG.getNode(ISD::FNEG, SL, VT,
13837 DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13838 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13839 DAG.getNode(PreferredFusedOpcode, SL, VT,
13840 DAG.getNode(ISD::FNEG, SL, VT,
13841 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13842 DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13847 return SDValue();
13850 /// Try to perform FMA combining on a given FMUL node based on the distributive
13851 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13852 /// subtraction instead of addition).
13853 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13854 SDValue N0 = N->getOperand(0);
13855 SDValue N1 = N->getOperand(1);
13856 EVT VT = N->getValueType(0);
13857 SDLoc SL(N);
13859 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13861 const TargetOptions &Options = DAG.getTarget().Options;
13863 // The transforms below are incorrect when x == 0 and y == inf, because the
13864 // intermediate multiplication produces a nan.
13865 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13866 if (!hasNoInfs(Options, FAdd))
13867 return SDValue();
13869 // Floating-point multiply-add without intermediate rounding.
13870 bool HasFMA =
13871 isContractableFMUL(Options, SDValue(N, 0)) &&
13872 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13873 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13875 // Floating-point multiply-add with intermediate rounding. This can result
13876 // in a less precise result due to the changed rounding order.
13877 bool HasFMAD = Options.UnsafeFPMath &&
13878 (LegalOperations && TLI.isFMADLegal(DAG, N));
13880 // No valid opcode, do not combine.
13881 if (!HasFMAD && !HasFMA)
13882 return SDValue();
13884 // Always prefer FMAD to FMA for precision.
13885 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13886 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13888 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13889 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13890 auto FuseFADD = [&](SDValue X, SDValue Y) {
13891 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13892 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13893 if (C->isExactlyValue(+1.0))
13894 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13896 if (C->isExactlyValue(-1.0))
13897 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13898 DAG.getNode(ISD::FNEG, SL, VT, Y));
13901 return SDValue();
13904 if (SDValue FMA = FuseFADD(N0, N1))
13905 return FMA;
13906 if (SDValue FMA = FuseFADD(N1, N0))
13907 return FMA;
13909 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13910 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13911 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13912 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13913 auto FuseFSUB = [&](SDValue X, SDValue Y) {
13914 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13915 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13916 if (C0->isExactlyValue(+1.0))
13917 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13918 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13920 if (C0->isExactlyValue(-1.0))
13921 return DAG.getNode(PreferredFusedOpcode, SL, VT,
13922 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13923 DAG.getNode(ISD::FNEG, SL, VT, Y));
13925 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13926 if (C1->isExactlyValue(+1.0))
13927 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13928 DAG.getNode(ISD::FNEG, SL, VT, Y));
13929 if (C1->isExactlyValue(-1.0))
13930 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13934 return SDValue();
13937 if (SDValue FMA = FuseFSUB(N0, N1))
13938 return FMA;
13939 if (SDValue FMA = FuseFSUB(N1, N0))
13940 return FMA;
13942 return SDValue();
13945 SDValue DAGCombiner::visitFADD(SDNode *N) {
13946 SDValue N0 = N->getOperand(0);
13947 SDValue N1 = N->getOperand(1);
13948 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13949 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13950 EVT VT = N->getValueType(0);
13951 SDLoc DL(N);
13952 const TargetOptions &Options = DAG.getTarget().Options;
13953 SDNodeFlags Flags = N->getFlags();
13954 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13956 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13957 return R;
13959 // fold (fadd c1, c2) -> c1 + c2
13960 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
13961 return C;
13963 // canonicalize constant to RHS
13964 if (N0CFP && !N1CFP)
13965 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13967 // fold vector ops
13968 if (VT.isVector())
13969 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
13970 return FoldedVOp;
13972 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13973 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13974 if (N1C && N1C->isZero())
13975 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13976 return N0;
13978 if (SDValue NewSel = foldBinOpIntoSelect(N))
13979 return NewSel;
13981 // fold (fadd A, (fneg B)) -> (fsub A, B)
13982 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13983 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13984 N1, DAG, LegalOperations, ForCodeSize))
13985 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13987 // fold (fadd (fneg A), B) -> (fsub B, A)
13988 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13989 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13990 N0, DAG, LegalOperations, ForCodeSize))
13991 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13993 auto isFMulNegTwo = [](SDValue FMul) {
13994 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13995 return false;
13996 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13997 return C && C->isExactlyValue(-2.0);
14000 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14001 if (isFMulNegTwo(N0)) {
14002 SDValue B = N0.getOperand(0);
14003 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14004 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14006 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14007 if (isFMulNegTwo(N1)) {
14008 SDValue B = N1.getOperand(0);
14009 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14010 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14013 // No FP constant should be created after legalization as Instruction
14014 // Selection pass has a hard time dealing with FP constants.
14015 bool AllowNewConst = (Level < AfterLegalizeDAG);
14017 // If nnan is enabled, fold lots of things.
14018 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14019 // If allowed, fold (fadd (fneg x), x) -> 0.0
14020 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14021 return DAG.getConstantFP(0.0, DL, VT);
14023 // If allowed, fold (fadd x, (fneg x)) -> 0.0
14024 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14025 return DAG.getConstantFP(0.0, DL, VT);
14028 // If 'unsafe math' or reassoc and nsz, fold lots of things.
14029 // TODO: break out portions of the transformations below for which Unsafe is
14030 // considered and which do not require both nsz and reassoc
14031 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14032 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14033 AllowNewConst) {
14034 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14035 if (N1CFP && N0.getOpcode() == ISD::FADD &&
14036 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14037 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14038 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14041 // We can fold chains of FADD's of the same value into multiplications.
14042 // This transform is not safe in general because we are reducing the number
14043 // of rounding steps.
14044 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14045 if (N0.getOpcode() == ISD::FMUL) {
14046 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14047 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14049 // (fadd (fmul x, c), x) -> (fmul x, c+1)
14050 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14051 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14052 DAG.getConstantFP(1.0, DL, VT));
14053 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14056 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14057 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14058 N1.getOperand(0) == N1.getOperand(1) &&
14059 N0.getOperand(0) == N1.getOperand(0)) {
14060 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14061 DAG.getConstantFP(2.0, DL, VT));
14062 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14066 if (N1.getOpcode() == ISD::FMUL) {
14067 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14068 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14070 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14071 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14072 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14073 DAG.getConstantFP(1.0, DL, VT));
14074 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14077 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14078 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14079 N0.getOperand(0) == N0.getOperand(1) &&
14080 N1.getOperand(0) == N0.getOperand(0)) {
14081 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14082 DAG.getConstantFP(2.0, DL, VT));
14083 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14087 if (N0.getOpcode() == ISD::FADD) {
14088 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14089 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14090 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14091 (N0.getOperand(0) == N1)) {
14092 return DAG.getNode(ISD::FMUL, DL, VT, N1,
14093 DAG.getConstantFP(3.0, DL, VT));
14097 if (N1.getOpcode() == ISD::FADD) {
14098 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14099 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14100 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14101 N1.getOperand(0) == N0) {
14102 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14103 DAG.getConstantFP(3.0, DL, VT));
14107 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14108 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14109 N0.getOperand(0) == N0.getOperand(1) &&
14110 N1.getOperand(0) == N1.getOperand(1) &&
14111 N0.getOperand(0) == N1.getOperand(0)) {
14112 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14113 DAG.getConstantFP(4.0, DL, VT));
14116 } // enable-unsafe-fp-math
14118 // FADD -> FMA combines:
14119 if (SDValue Fused = visitFADDForFMACombine(N)) {
14120 AddToWorklist(Fused.getNode());
14121 return Fused;
14123 return SDValue();
14126 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14127 SDValue Chain = N->getOperand(0);
14128 SDValue N0 = N->getOperand(1);
14129 SDValue N1 = N->getOperand(2);
14130 EVT VT = N->getValueType(0);
14131 EVT ChainVT = N->getValueType(1);
14132 SDLoc DL(N);
14133 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14135 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14136 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14137 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14138 N1, DAG, LegalOperations, ForCodeSize)) {
14139 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14140 {Chain, N0, NegN1});
14143 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14144 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14145 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14146 N0, DAG, LegalOperations, ForCodeSize)) {
14147 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14148 {Chain, N1, NegN0});
14150 return SDValue();
14153 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14154 SDValue N0 = N->getOperand(0);
14155 SDValue N1 = N->getOperand(1);
14156 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14157 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14158 EVT VT = N->getValueType(0);
14159 SDLoc DL(N);
14160 const TargetOptions &Options = DAG.getTarget().Options;
14161 const SDNodeFlags Flags = N->getFlags();
14162 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14164 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14165 return R;
14167 // fold (fsub c1, c2) -> c1-c2
14168 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14169 return C;
14171 // fold vector ops
14172 if (VT.isVector())
14173 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14174 return FoldedVOp;
14176 if (SDValue NewSel = foldBinOpIntoSelect(N))
14177 return NewSel;
14179 // (fsub A, 0) -> A
14180 if (N1CFP && N1CFP->isZero()) {
14181 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14182 Flags.hasNoSignedZeros()) {
14183 return N0;
14187 if (N0 == N1) {
14188 // (fsub x, x) -> 0.0
14189 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14190 return DAG.getConstantFP(0.0f, DL, VT);
14193 // (fsub -0.0, N1) -> -N1
14194 if (N0CFP && N0CFP->isZero()) {
14195 if (N0CFP->isNegative() ||
14196 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14197 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14198 // flushed to zero, unless all users treat denorms as zero (DAZ).
14199 // FIXME: This transform will change the sign of a NaN and the behavior
14200 // of a signaling NaN. It is only valid when a NoNaN flag is present.
14201 DenormalMode DenormMode = DAG.getDenormalMode(VT);
14202 if (DenormMode == DenormalMode::getIEEE()) {
14203 if (SDValue NegN1 =
14204 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14205 return NegN1;
14206 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14207 return DAG.getNode(ISD::FNEG, DL, VT, N1);
14212 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14213 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14214 N1.getOpcode() == ISD::FADD) {
14215 // X - (X + Y) -> -Y
14216 if (N0 == N1->getOperand(0))
14217 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14218 // X - (Y + X) -> -Y
14219 if (N0 == N1->getOperand(1))
14220 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14223 // fold (fsub A, (fneg B)) -> (fadd A, B)
14224 if (SDValue NegN1 =
14225 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14226 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14228 // FSUB -> FMA combines:
14229 if (SDValue Fused = visitFSUBForFMACombine(N)) {
14230 AddToWorklist(Fused.getNode());
14231 return Fused;
14234 return SDValue();
14237 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14238 SDValue N0 = N->getOperand(0);
14239 SDValue N1 = N->getOperand(1);
14240 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14241 EVT VT = N->getValueType(0);
14242 SDLoc DL(N);
14243 const TargetOptions &Options = DAG.getTarget().Options;
14244 const SDNodeFlags Flags = N->getFlags();
14245 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14247 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14248 return R;
14250 // fold (fmul c1, c2) -> c1*c2
14251 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14252 return C;
14254 // canonicalize constant to RHS
14255 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14256 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14257 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14259 // fold vector ops
14260 if (VT.isVector())
14261 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14262 return FoldedVOp;
14264 if (SDValue NewSel = foldBinOpIntoSelect(N))
14265 return NewSel;
14267 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14268 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14269 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14270 N0.getOpcode() == ISD::FMUL) {
14271 SDValue N00 = N0.getOperand(0);
14272 SDValue N01 = N0.getOperand(1);
14273 // Avoid an infinite loop by making sure that N00 is not a constant
14274 // (the inner multiply has not been constant folded yet).
14275 if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14276 !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14277 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14278 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14282 // Match a special-case: we convert X * 2.0 into fadd.
14283 // fmul (fadd X, X), C -> fmul X, 2.0 * C
14284 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14285 N0.getOperand(0) == N0.getOperand(1)) {
14286 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14287 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14288 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14292 // fold (fmul X, 2.0) -> (fadd X, X)
14293 if (N1CFP && N1CFP->isExactlyValue(+2.0))
14294 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14296 // fold (fmul X, -1.0) -> (fsub -0.0, X)
14297 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14298 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14299 return DAG.getNode(ISD::FSUB, DL, VT,
14300 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14304 // -N0 * -N1 --> N0 * N1
14305 TargetLowering::NegatibleCost CostN0 =
14306 TargetLowering::NegatibleCost::Expensive;
14307 TargetLowering::NegatibleCost CostN1 =
14308 TargetLowering::NegatibleCost::Expensive;
14309 SDValue NegN0 =
14310 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14311 SDValue NegN1 =
14312 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14313 if (NegN0 && NegN1 &&
14314 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14315 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14316 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14318 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14319 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14320 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14321 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14322 TLI.isOperationLegal(ISD::FABS, VT)) {
14323 SDValue Select = N0, X = N1;
14324 if (Select.getOpcode() != ISD::SELECT)
14325 std::swap(Select, X);
14327 SDValue Cond = Select.getOperand(0);
14328 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14329 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14331 if (TrueOpnd && FalseOpnd &&
14332 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14333 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14334 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14335 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14336 switch (CC) {
14337 default: break;
14338 case ISD::SETOLT:
14339 case ISD::SETULT:
14340 case ISD::SETOLE:
14341 case ISD::SETULE:
14342 case ISD::SETLT:
14343 case ISD::SETLE:
14344 std::swap(TrueOpnd, FalseOpnd);
14345 LLVM_FALLTHROUGH;
14346 case ISD::SETOGT:
14347 case ISD::SETUGT:
14348 case ISD::SETOGE:
14349 case ISD::SETUGE:
14350 case ISD::SETGT:
14351 case ISD::SETGE:
14352 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14353 TLI.isOperationLegal(ISD::FNEG, VT))
14354 return DAG.getNode(ISD::FNEG, DL, VT,
14355 DAG.getNode(ISD::FABS, DL, VT, X));
14356 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14357 return DAG.getNode(ISD::FABS, DL, VT, X);
14359 break;
14364 // FMUL -> FMA combines:
14365 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14366 AddToWorklist(Fused.getNode());
14367 return Fused;
14370 return SDValue();
14373 SDValue DAGCombiner::visitFMA(SDNode *N) {
14374 SDValue N0 = N->getOperand(0);
14375 SDValue N1 = N->getOperand(1);
14376 SDValue N2 = N->getOperand(2);
14377 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14378 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14379 EVT VT = N->getValueType(0);
14380 SDLoc DL(N);
14381 const TargetOptions &Options = DAG.getTarget().Options;
14382 // FMA nodes have flags that propagate to the created nodes.
14383 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14385 bool UnsafeFPMath =
14386 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14388 // Constant fold FMA.
14389 if (isa<ConstantFPSDNode>(N0) &&
14390 isa<ConstantFPSDNode>(N1) &&
14391 isa<ConstantFPSDNode>(N2)) {
14392 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14395 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14396 TargetLowering::NegatibleCost CostN0 =
14397 TargetLowering::NegatibleCost::Expensive;
14398 TargetLowering::NegatibleCost CostN1 =
14399 TargetLowering::NegatibleCost::Expensive;
14400 SDValue NegN0 =
14401 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14402 SDValue NegN1 =
14403 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14404 if (NegN0 && NegN1 &&
14405 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14406 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14407 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14409 if (UnsafeFPMath) {
14410 if (N0CFP && N0CFP->isZero())
14411 return N2;
14412 if (N1CFP && N1CFP->isZero())
14413 return N2;
14416 if (N0CFP && N0CFP->isExactlyValue(1.0))
14417 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14418 if (N1CFP && N1CFP->isExactlyValue(1.0))
14419 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14421 // Canonicalize (fma c, x, y) -> (fma x, c, y)
14422 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14423 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14424 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14426 if (UnsafeFPMath) {
14427 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14428 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14429 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14430 DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14431 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14432 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14435 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14436 if (N0.getOpcode() == ISD::FMUL &&
14437 DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14438 DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14439 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14440 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14441 N2);
14445 // (fma x, -1, y) -> (fadd (fneg x), y)
14446 if (N1CFP) {
14447 if (N1CFP->isExactlyValue(1.0))
14448 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14450 if (N1CFP->isExactlyValue(-1.0) &&
14451 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14452 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14453 AddToWorklist(RHSNeg.getNode());
14454 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14457 // fma (fneg x), K, y -> fma x -K, y
14458 if (N0.getOpcode() == ISD::FNEG &&
14459 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14460 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14461 ForCodeSize)))) {
14462 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14463 DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14467 if (UnsafeFPMath) {
14468 // (fma x, c, x) -> (fmul x, (c+1))
14469 if (N1CFP && N0 == N2) {
14470 return DAG.getNode(
14471 ISD::FMUL, DL, VT, N0,
14472 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14475 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14476 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14477 return DAG.getNode(
14478 ISD::FMUL, DL, VT, N0,
14479 DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14483 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14484 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14485 if (!TLI.isFNegFree(VT))
14486 if (SDValue Neg = TLI.getCheaperNegatedExpression(
14487 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14488 return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14489 return SDValue();
14492 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14493 // reciprocal.
14494 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14495 // Notice that this is not always beneficial. One reason is different targets
14496 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14497 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14498 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14499 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14500 // TODO: Limit this transform based on optsize/minsize - it always creates at
14501 // least 1 extra instruction. But the perf win may be substantial enough
14502 // that only minsize should restrict this.
14503 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14504 const SDNodeFlags Flags = N->getFlags();
14505 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14506 return SDValue();
14508 // Skip if current node is a reciprocal/fneg-reciprocal.
14509 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14510 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14511 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14512 return SDValue();
14514 // Exit early if the target does not want this transform or if there can't
14515 // possibly be enough uses of the divisor to make the transform worthwhile.
14516 unsigned MinUses = TLI.combineRepeatedFPDivisors();
14518 // For splat vectors, scale the number of uses by the splat factor. If we can
14519 // convert the division into a scalar op, that will likely be much faster.
14520 unsigned NumElts = 1;
14521 EVT VT = N->getValueType(0);
14522 if (VT.isVector() && DAG.isSplatValue(N1))
14523 NumElts = VT.getVectorNumElements();
14525 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14526 return SDValue();
14528 // Find all FDIV users of the same divisor.
14529 // Use a set because duplicates may be present in the user list.
14530 SetVector<SDNode *> Users;
14531 for (auto *U : N1->uses()) {
14532 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14533 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14534 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14535 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14536 U->getFlags().hasAllowReassociation() &&
14537 U->getFlags().hasNoSignedZeros())
14538 continue;
14540 // This division is eligible for optimization only if global unsafe math
14541 // is enabled or if this division allows reciprocal formation.
14542 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14543 Users.insert(U);
14547 // Now that we have the actual number of divisor uses, make sure it meets
14548 // the minimum threshold specified by the target.
14549 if ((Users.size() * NumElts) < MinUses)
14550 return SDValue();
14552 SDLoc DL(N);
14553 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14554 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14556 // Dividend / Divisor -> Dividend * Reciprocal
14557 for (auto *U : Users) {
14558 SDValue Dividend = U->getOperand(0);
14559 if (Dividend != FPOne) {
14560 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14561 Reciprocal, Flags);
14562 CombineTo(U, NewNode);
14563 } else if (U != Reciprocal.getNode()) {
14564 // In the absence of fast-math-flags, this user node is always the
14565 // same node as Reciprocal, but with FMF they may be different nodes.
14566 CombineTo(U, Reciprocal);
14569 return SDValue(N, 0); // N was replaced.
14572 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14573 SDValue N0 = N->getOperand(0);
14574 SDValue N1 = N->getOperand(1);
14575 EVT VT = N->getValueType(0);
14576 SDLoc DL(N);
14577 const TargetOptions &Options = DAG.getTarget().Options;
14578 SDNodeFlags Flags = N->getFlags();
14579 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14581 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14582 return R;
14584 // fold (fdiv c1, c2) -> c1/c2
14585 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
14586 return C;
14588 // fold vector ops
14589 if (VT.isVector())
14590 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14591 return FoldedVOp;
14593 if (SDValue NewSel = foldBinOpIntoSelect(N))
14594 return NewSel;
14596 if (SDValue V = combineRepeatedFPDivisors(N))
14597 return V;
14599 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14600 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14601 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14602 // Compute the reciprocal 1.0 / c2.
14603 const APFloat &N1APF = N1CFP->getValueAPF();
14604 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14605 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14606 // Only do the transform if the reciprocal is a legal fp immediate that
14607 // isn't too nasty (eg NaN, denormal, ...).
14608 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14609 (!LegalOperations ||
14610 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14611 // backend)... we should handle this gracefully after Legalize.
14612 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14613 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14614 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14615 return DAG.getNode(ISD::FMUL, DL, VT, N0,
14616 DAG.getConstantFP(Recip, DL, VT));
14619 // If this FDIV is part of a reciprocal square root, it may be folded
14620 // into a target-specific square root estimate instruction.
14621 if (N1.getOpcode() == ISD::FSQRT) {
14622 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14623 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14624 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14625 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14626 if (SDValue RV =
14627 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14628 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14629 AddToWorklist(RV.getNode());
14630 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14632 } else if (N1.getOpcode() == ISD::FP_ROUND &&
14633 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14634 if (SDValue RV =
14635 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14636 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14637 AddToWorklist(RV.getNode());
14638 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14640 } else if (N1.getOpcode() == ISD::FMUL) {
14641 // Look through an FMUL. Even though this won't remove the FDIV directly,
14642 // it's still worthwhile to get rid of the FSQRT if possible.
14643 SDValue Sqrt, Y;
14644 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14645 Sqrt = N1.getOperand(0);
14646 Y = N1.getOperand(1);
14647 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14648 Sqrt = N1.getOperand(1);
14649 Y = N1.getOperand(0);
14651 if (Sqrt.getNode()) {
14652 // If the other multiply operand is known positive, pull it into the
14653 // sqrt. That will eliminate the division if we convert to an estimate.
14654 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14655 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14656 SDValue A;
14657 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14658 A = Y.getOperand(0);
14659 else if (Y == Sqrt.getOperand(0))
14660 A = Y;
14661 if (A) {
14662 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14663 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14664 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14665 SDValue AAZ =
14666 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14667 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14668 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14670 // Estimate creation failed. Clean up speculatively created nodes.
14671 recursivelyDeleteUnusedNodes(AAZ.getNode());
14675 // We found a FSQRT, so try to make this fold:
14676 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14677 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14678 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14679 AddToWorklist(Div.getNode());
14680 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14685 // Fold into a reciprocal estimate and multiply instead of a real divide.
14686 if (Options.NoInfsFPMath || Flags.hasNoInfs())
14687 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14688 return RV;
14691 // Fold X/Sqrt(X) -> Sqrt(X)
14692 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14693 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14694 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14695 return N1;
14697 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14698 TargetLowering::NegatibleCost CostN0 =
14699 TargetLowering::NegatibleCost::Expensive;
14700 TargetLowering::NegatibleCost CostN1 =
14701 TargetLowering::NegatibleCost::Expensive;
14702 SDValue NegN0 =
14703 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14704 SDValue NegN1 =
14705 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14706 if (NegN0 && NegN1 &&
14707 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14708 CostN1 == TargetLowering::NegatibleCost::Cheaper))
14709 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14711 return SDValue();
14714 SDValue DAGCombiner::visitFREM(SDNode *N) {
14715 SDValue N0 = N->getOperand(0);
14716 SDValue N1 = N->getOperand(1);
14717 EVT VT = N->getValueType(0);
14718 SDNodeFlags Flags = N->getFlags();
14719 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14721 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14722 return R;
14724 // fold (frem c1, c2) -> fmod(c1,c2)
14725 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
14726 return C;
14728 if (SDValue NewSel = foldBinOpIntoSelect(N))
14729 return NewSel;
14731 return SDValue();
14734 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14735 SDNodeFlags Flags = N->getFlags();
14736 const TargetOptions &Options = DAG.getTarget().Options;
14738 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14739 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14740 if (!Flags.hasApproximateFuncs() ||
14741 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14742 return SDValue();
14744 SDValue N0 = N->getOperand(0);
14745 if (TLI.isFsqrtCheap(N0, DAG))
14746 return SDValue();
14748 // FSQRT nodes have flags that propagate to the created nodes.
14749 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14750 // transform the fdiv, we may produce a sub-optimal estimate sequence
14751 // because the reciprocal calculation may not have to filter out a
14752 // 0.0 input.
14753 return buildSqrtEstimate(N0, Flags);
14756 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14757 /// copysign(x, fp_round(y)) -> copysign(x, y)
14758 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14759 SDValue N1 = N->getOperand(1);
14760 if ((N1.getOpcode() == ISD::FP_EXTEND ||
14761 N1.getOpcode() == ISD::FP_ROUND)) {
14762 EVT N1VT = N1->getValueType(0);
14763 EVT N1Op0VT = N1->getOperand(0).getValueType();
14765 // Always fold no-op FP casts.
14766 if (N1VT == N1Op0VT)
14767 return true;
14769 // Do not optimize out type conversion of f128 type yet.
14770 // For some targets like x86_64, configuration is changed to keep one f128
14771 // value in one SSE register, but instruction selection cannot handle
14772 // FCOPYSIGN on SSE registers yet.
14773 if (N1Op0VT == MVT::f128)
14774 return false;
14776 // Avoid mismatched vector operand types, for better instruction selection.
14777 if (N1Op0VT.isVector())
14778 return false;
14780 return true;
14782 return false;
14785 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14786 SDValue N0 = N->getOperand(0);
14787 SDValue N1 = N->getOperand(1);
14788 EVT VT = N->getValueType(0);
14790 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
14791 if (SDValue C =
14792 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
14793 return C;
14795 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14796 const APFloat &V = N1C->getValueAPF();
14797 // copysign(x, c1) -> fabs(x) iff ispos(c1)
14798 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14799 if (!V.isNegative()) {
14800 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14801 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14802 } else {
14803 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14804 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14805 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14809 // copysign(fabs(x), y) -> copysign(x, y)
14810 // copysign(fneg(x), y) -> copysign(x, y)
14811 // copysign(copysign(x,z), y) -> copysign(x, y)
14812 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14813 N0.getOpcode() == ISD::FCOPYSIGN)
14814 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14816 // copysign(x, abs(y)) -> abs(x)
14817 if (N1.getOpcode() == ISD::FABS)
14818 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14820 // copysign(x, copysign(y,z)) -> copysign(x, z)
14821 if (N1.getOpcode() == ISD::FCOPYSIGN)
14822 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14824 // copysign(x, fp_extend(y)) -> copysign(x, y)
14825 // copysign(x, fp_round(y)) -> copysign(x, y)
14826 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14827 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14829 return SDValue();
14832 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14833 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14834 if (!ExponentC)
14835 return SDValue();
14836 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14838 // Try to convert x ** (1/3) into cube root.
14839 // TODO: Handle the various flavors of long double.
14840 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14841 // Some range near 1/3 should be fine.
14842 EVT VT = N->getValueType(0);
14843 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14844 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14845 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14846 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14847 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
14848 // For regular numbers, rounding may cause the results to differ.
14849 // Therefore, we require { nsz ninf nnan afn } for this transform.
14850 // TODO: We could select out the special cases if we don't have nsz/ninf.
14851 SDNodeFlags Flags = N->getFlags();
14852 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14853 !Flags.hasApproximateFuncs())
14854 return SDValue();
14856 // Do not create a cbrt() libcall if the target does not have it, and do not
14857 // turn a pow that has lowering support into a cbrt() libcall.
14858 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14859 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14860 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14861 return SDValue();
14863 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14866 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14867 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14868 // TODO: This could be extended (using a target hook) to handle smaller
14869 // power-of-2 fractional exponents.
14870 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14871 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14872 if (ExponentIs025 || ExponentIs075) {
14873 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14874 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
14875 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14876 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
14877 // For regular numbers, rounding may cause the results to differ.
14878 // Therefore, we require { nsz ninf afn } for this transform.
14879 // TODO: We could select out the special cases if we don't have nsz/ninf.
14880 SDNodeFlags Flags = N->getFlags();
14882 // We only need no signed zeros for the 0.25 case.
14883 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14884 !Flags.hasApproximateFuncs())
14885 return SDValue();
14887 // Don't double the number of libcalls. We are trying to inline fast code.
14888 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14889 return SDValue();
14891 // Assume that libcalls are the smallest code.
14892 // TODO: This restriction should probably be lifted for vectors.
14893 if (ForCodeSize)
14894 return SDValue();
14896 // pow(X, 0.25) --> sqrt(sqrt(X))
14897 SDLoc DL(N);
14898 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14899 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14900 if (ExponentIs025)
14901 return SqrtSqrt;
14902 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14903 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14906 return SDValue();
14909 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14910 const TargetLowering &TLI) {
14911 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14912 // replacing casts with a libcall. We also must be allowed to ignore -0.0
14913 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14914 // conversions would return +0.0.
14915 // FIXME: We should be able to use node-level FMF here.
14916 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14917 EVT VT = N->getValueType(0);
14918 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14919 !DAG.getTarget().Options.NoSignedZerosFPMath)
14920 return SDValue();
14922 // fptosi/fptoui round towards zero, so converting from FP to integer and
14923 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14924 SDValue N0 = N->getOperand(0);
14925 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14926 N0.getOperand(0).getValueType() == VT)
14927 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14929 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14930 N0.getOperand(0).getValueType() == VT)
14931 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14933 return SDValue();
14936 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14937 SDValue N0 = N->getOperand(0);
14938 EVT VT = N->getValueType(0);
14939 EVT OpVT = N0.getValueType();
14941 // [us]itofp(undef) = 0, because the result value is bounded.
14942 if (N0.isUndef())
14943 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14945 // fold (sint_to_fp c1) -> c1fp
14946 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14947 // ...but only if the target supports immediate floating-point values
14948 (!LegalOperations ||
14949 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14950 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14952 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14953 // but UINT_TO_FP is legal on this target, try to convert.
14954 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14955 hasOperation(ISD::UINT_TO_FP, OpVT)) {
14956 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14957 if (DAG.SignBitIsZero(N0))
14958 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14961 // The next optimizations are desirable only if SELECT_CC can be lowered.
14962 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14963 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14964 !VT.isVector() &&
14965 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14966 SDLoc DL(N);
14967 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14968 DAG.getConstantFP(0.0, DL, VT));
14971 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14972 // (select (setcc x, y, cc), 1.0, 0.0)
14973 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14974 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14975 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14976 SDLoc DL(N);
14977 return DAG.getSelect(DL, VT, N0.getOperand(0),
14978 DAG.getConstantFP(1.0, DL, VT),
14979 DAG.getConstantFP(0.0, DL, VT));
14982 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14983 return FTrunc;
14985 return SDValue();
14988 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14989 SDValue N0 = N->getOperand(0);
14990 EVT VT = N->getValueType(0);
14991 EVT OpVT = N0.getValueType();
14993 // [us]itofp(undef) = 0, because the result value is bounded.
14994 if (N0.isUndef())
14995 return DAG.getConstantFP(0.0, SDLoc(N), VT);
14997 // fold (uint_to_fp c1) -> c1fp
14998 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14999 // ...but only if the target supports immediate floating-point values
15000 (!LegalOperations ||
15001 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15002 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15004 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15005 // but SINT_TO_FP is legal on this target, try to convert.
15006 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15007 hasOperation(ISD::SINT_TO_FP, OpVT)) {
15008 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15009 if (DAG.SignBitIsZero(N0))
15010 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15013 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15014 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15015 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15016 SDLoc DL(N);
15017 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15018 DAG.getConstantFP(0.0, DL, VT));
15021 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15022 return FTrunc;
15024 return SDValue();
15027 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15028 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15029 SDValue N0 = N->getOperand(0);
15030 EVT VT = N->getValueType(0);
15032 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15033 return SDValue();
15035 SDValue Src = N0.getOperand(0);
15036 EVT SrcVT = Src.getValueType();
15037 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15038 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15040 // We can safely assume the conversion won't overflow the output range,
15041 // because (for example) (uint8_t)18293.f is undefined behavior.
15043 // Since we can assume the conversion won't overflow, our decision as to
15044 // whether the input will fit in the float should depend on the minimum
15045 // of the input range and output range.
15047 // This means this is also safe for a signed input and unsigned output, since
15048 // a negative input would lead to undefined behavior.
15049 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15050 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15051 unsigned ActualSize = std::min(InputSize, OutputSize);
15052 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15054 // We can only fold away the float conversion if the input range can be
15055 // represented exactly in the float range.
15056 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15057 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15058 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15059 : ISD::ZERO_EXTEND;
15060 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15062 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15063 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15064 return DAG.getBitcast(VT, Src);
15066 return SDValue();
15069 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15070 SDValue N0 = N->getOperand(0);
15071 EVT VT = N->getValueType(0);
15073 // fold (fp_to_sint undef) -> undef
15074 if (N0.isUndef())
15075 return DAG.getUNDEF(VT);
15077 // fold (fp_to_sint c1fp) -> c1
15078 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15079 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15081 return FoldIntToFPToInt(N, DAG);
15084 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15085 SDValue N0 = N->getOperand(0);
15086 EVT VT = N->getValueType(0);
15088 // fold (fp_to_uint undef) -> undef
15089 if (N0.isUndef())
15090 return DAG.getUNDEF(VT);
15092 // fold (fp_to_uint c1fp) -> c1
15093 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15094 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15096 return FoldIntToFPToInt(N, DAG);
15099 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15100 SDValue N0 = N->getOperand(0);
15101 SDValue N1 = N->getOperand(1);
15102 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15103 EVT VT = N->getValueType(0);
15105 // fold (fp_round c1fp) -> c1fp
15106 if (N0CFP)
15107 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15109 // fold (fp_round (fp_extend x)) -> x
15110 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15111 return N0.getOperand(0);
15113 // fold (fp_round (fp_round x)) -> (fp_round x)
15114 if (N0.getOpcode() == ISD::FP_ROUND) {
15115 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15116 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15118 // Skip this folding if it results in an fp_round from f80 to f16.
15120 // f80 to f16 always generates an expensive (and as yet, unimplemented)
15121 // libcall to __truncxfhf2 instead of selecting native f16 conversion
15122 // instructions from f32 or f64. Moreover, the first (value-preserving)
15123 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15124 // x86.
15125 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15126 return SDValue();
15128 // If the first fp_round isn't a value preserving truncation, it might
15129 // introduce a tie in the second fp_round, that wouldn't occur in the
15130 // single-step fp_round we want to fold to.
15131 // In other words, double rounding isn't the same as rounding.
15132 // Also, this is a value preserving truncation iff both fp_round's are.
15133 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15134 SDLoc DL(N);
15135 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15136 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15140 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15141 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
15142 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15143 N0.getOperand(0), N1);
15144 AddToWorklist(Tmp.getNode());
15145 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15146 Tmp, N0.getOperand(1));
15149 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15150 return NewVSel;
15152 return SDValue();
15155 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15156 SDValue N0 = N->getOperand(0);
15157 EVT VT = N->getValueType(0);
15159 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15160 if (N->hasOneUse() &&
15161 N->use_begin()->getOpcode() == ISD::FP_ROUND)
15162 return SDValue();
15164 // fold (fp_extend c1fp) -> c1fp
15165 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15166 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15168 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15169 if (N0.getOpcode() == ISD::FP16_TO_FP &&
15170 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15171 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15173 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15174 // value of X.
15175 if (N0.getOpcode() == ISD::FP_ROUND
15176 && N0.getConstantOperandVal(1) == 1) {
15177 SDValue In = N0.getOperand(0);
15178 if (In.getValueType() == VT) return In;
15179 if (VT.bitsLT(In.getValueType()))
15180 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15181 In, N0.getOperand(1));
15182 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15185 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15186 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15187 TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15188 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15189 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15190 LN0->getChain(),
15191 LN0->getBasePtr(), N0.getValueType(),
15192 LN0->getMemOperand());
15193 CombineTo(N, ExtLoad);
15194 CombineTo(N0.getNode(),
15195 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15196 N0.getValueType(), ExtLoad,
15197 DAG.getIntPtrConstant(1, SDLoc(N0))),
15198 ExtLoad.getValue(1));
15199 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15202 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15203 return NewVSel;
15205 return SDValue();
15208 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15209 SDValue N0 = N->getOperand(0);
15210 EVT VT = N->getValueType(0);
15212 // fold (fceil c1) -> fceil(c1)
15213 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15214 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15216 return SDValue();
15219 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15220 SDValue N0 = N->getOperand(0);
15221 EVT VT = N->getValueType(0);
15223 // fold (ftrunc c1) -> ftrunc(c1)
15224 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15225 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15227 // fold ftrunc (known rounded int x) -> x
15228 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15229 // likely to be generated to extract integer from a rounded floating value.
15230 switch (N0.getOpcode()) {
15231 default: break;
15232 case ISD::FRINT:
15233 case ISD::FTRUNC:
15234 case ISD::FNEARBYINT:
15235 case ISD::FFLOOR:
15236 case ISD::FCEIL:
15237 return N0;
15240 return SDValue();
15243 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15244 SDValue N0 = N->getOperand(0);
15245 EVT VT = N->getValueType(0);
15247 // fold (ffloor c1) -> ffloor(c1)
15248 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15249 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15251 return SDValue();
15254 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15255 SDValue N0 = N->getOperand(0);
15256 EVT VT = N->getValueType(0);
15257 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15259 // Constant fold FNEG.
15260 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15261 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15263 if (SDValue NegN0 =
15264 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15265 return NegN0;
15267 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15268 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15269 // know it was called from a context with a nsz flag if the input fsub does
15270 // not.
15271 if (N0.getOpcode() == ISD::FSUB &&
15272 (DAG.getTarget().Options.NoSignedZerosFPMath ||
15273 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15274 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15275 N0.getOperand(0));
15278 if (SDValue Cast = foldSignChangeInBitcast(N))
15279 return Cast;
15281 return SDValue();
15284 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15285 SDValue N0 = N->getOperand(0);
15286 SDValue N1 = N->getOperand(1);
15287 EVT VT = N->getValueType(0);
15288 const SDNodeFlags Flags = N->getFlags();
15289 unsigned Opc = N->getOpcode();
15290 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15291 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15292 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15294 // Constant fold.
15295 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15296 return C;
15298 // Canonicalize to constant on RHS.
15299 if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15300 !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15301 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15303 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15304 const APFloat &AF = N1CFP->getValueAPF();
15306 // minnum(X, nan) -> X
15307 // maxnum(X, nan) -> X
15308 // minimum(X, nan) -> nan
15309 // maximum(X, nan) -> nan
15310 if (AF.isNaN())
15311 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15313 // In the following folds, inf can be replaced with the largest finite
15314 // float, if the ninf flag is set.
15315 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15316 // minnum(X, -inf) -> -inf
15317 // maxnum(X, +inf) -> +inf
15318 // minimum(X, -inf) -> -inf if nnan
15319 // maximum(X, +inf) -> +inf if nnan
15320 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15321 return N->getOperand(1);
15323 // minnum(X, +inf) -> X if nnan
15324 // maxnum(X, -inf) -> X if nnan
15325 // minimum(X, +inf) -> X
15326 // maximum(X, -inf) -> X
15327 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15328 return N->getOperand(0);
15332 return SDValue();
15335 SDValue DAGCombiner::visitFABS(SDNode *N) {
15336 SDValue N0 = N->getOperand(0);
15337 EVT VT = N->getValueType(0);
15339 // fold (fabs c1) -> fabs(c1)
15340 if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15341 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15343 // fold (fabs (fabs x)) -> (fabs x)
15344 if (N0.getOpcode() == ISD::FABS)
15345 return N->getOperand(0);
15347 // fold (fabs (fneg x)) -> (fabs x)
15348 // fold (fabs (fcopysign x, y)) -> (fabs x)
15349 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15350 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15352 if (SDValue Cast = foldSignChangeInBitcast(N))
15353 return Cast;
15355 return SDValue();
15358 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15359 SDValue Chain = N->getOperand(0);
15360 SDValue N1 = N->getOperand(1);
15361 SDValue N2 = N->getOperand(2);
15363 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15364 // nondeterministic jumps).
15365 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15366 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15367 N1->getOperand(0), N2);
15370 // If N is a constant we could fold this into a fallthrough or unconditional
15371 // branch. However that doesn't happen very often in normal code, because
15372 // Instcombine/SimplifyCFG should have handled the available opportunities.
15373 // If we did this folding here, it would be necessary to update the
15374 // MachineBasicBlock CFG, which is awkward.
15376 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15377 // on the target.
15378 if (N1.getOpcode() == ISD::SETCC &&
15379 TLI.isOperationLegalOrCustom(ISD::BR_CC,
15380 N1.getOperand(0).getValueType())) {
15381 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15382 Chain, N1.getOperand(2),
15383 N1.getOperand(0), N1.getOperand(1), N2);
15386 if (N1.hasOneUse()) {
15387 // rebuildSetCC calls visitXor which may change the Chain when there is a
15388 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15389 HandleSDNode ChainHandle(Chain);
15390 if (SDValue NewN1 = rebuildSetCC(N1))
15391 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15392 ChainHandle.getValue(), NewN1, N2);
15395 return SDValue();
15398 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15399 if (N.getOpcode() == ISD::SRL ||
15400 (N.getOpcode() == ISD::TRUNCATE &&
15401 (N.getOperand(0).hasOneUse() &&
15402 N.getOperand(0).getOpcode() == ISD::SRL))) {
15403 // Look pass the truncate.
15404 if (N.getOpcode() == ISD::TRUNCATE)
15405 N = N.getOperand(0);
15407 // Match this pattern so that we can generate simpler code:
15409 // %a = ...
15410 // %b = and i32 %a, 2
15411 // %c = srl i32 %b, 1
15412 // brcond i32 %c ...
15414 // into
15416 // %a = ...
15417 // %b = and i32 %a, 2
15418 // %c = setcc eq %b, 0
15419 // brcond %c ...
15421 // This applies only when the AND constant value has one bit set and the
15422 // SRL constant is equal to the log2 of the AND constant. The back-end is
15423 // smart enough to convert the result into a TEST/JMP sequence.
15424 SDValue Op0 = N.getOperand(0);
15425 SDValue Op1 = N.getOperand(1);
15427 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15428 SDValue AndOp1 = Op0.getOperand(1);
15430 if (AndOp1.getOpcode() == ISD::Constant) {
15431 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15433 if (AndConst.isPowerOf2() &&
15434 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15435 SDLoc DL(N);
15436 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15437 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15438 ISD::SETNE);
15444 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15445 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15446 if (N.getOpcode() == ISD::XOR) {
15447 // Because we may call this on a speculatively constructed
15448 // SimplifiedSetCC Node, we need to simplify this node first.
15449 // Ideally this should be folded into SimplifySetCC and not
15450 // here. For now, grab a handle to N so we don't lose it from
15451 // replacements interal to the visit.
15452 HandleSDNode XORHandle(N);
15453 while (N.getOpcode() == ISD::XOR) {
15454 SDValue Tmp = visitXOR(N.getNode());
15455 // No simplification done.
15456 if (!Tmp.getNode())
15457 break;
15458 // Returning N is form in-visit replacement that may invalidated
15459 // N. Grab value from Handle.
15460 if (Tmp.getNode() == N.getNode())
15461 N = XORHandle.getValue();
15462 else // Node simplified. Try simplifying again.
15463 N = Tmp;
15466 if (N.getOpcode() != ISD::XOR)
15467 return N;
15469 SDValue Op0 = N->getOperand(0);
15470 SDValue Op1 = N->getOperand(1);
15472 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15473 bool Equal = false;
15474 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15475 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15476 Op0.getValueType() == MVT::i1) {
15477 N = Op0;
15478 Op0 = N->getOperand(0);
15479 Op1 = N->getOperand(1);
15480 Equal = true;
15483 EVT SetCCVT = N.getValueType();
15484 if (LegalTypes)
15485 SetCCVT = getSetCCResultType(SetCCVT);
15486 // Replace the uses of XOR with SETCC
15487 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15488 Equal ? ISD::SETEQ : ISD::SETNE);
15492 return SDValue();
15495 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15497 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15498 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15499 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15501 // If N is a constant we could fold this into a fallthrough or unconditional
15502 // branch. However that doesn't happen very often in normal code, because
15503 // Instcombine/SimplifyCFG should have handled the available opportunities.
15504 // If we did this folding here, it would be necessary to update the
15505 // MachineBasicBlock CFG, which is awkward.
15507 // Use SimplifySetCC to simplify SETCC's.
15508 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15509 CondLHS, CondRHS, CC->get(), SDLoc(N),
15510 false);
15511 if (Simp.getNode()) AddToWorklist(Simp.getNode());
15513 // fold to a simpler setcc
15514 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15515 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15516 N->getOperand(0), Simp.getOperand(2),
15517 Simp.getOperand(0), Simp.getOperand(1),
15518 N->getOperand(4));
15520 return SDValue();
15523 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15524 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15525 const TargetLowering &TLI) {
15526 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15527 if (LD->isIndexed())
15528 return false;
15529 EVT VT = LD->getMemoryVT();
15530 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15531 return false;
15532 Ptr = LD->getBasePtr();
15533 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15534 if (ST->isIndexed())
15535 return false;
15536 EVT VT = ST->getMemoryVT();
15537 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15538 return false;
15539 Ptr = ST->getBasePtr();
15540 IsLoad = false;
15541 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15542 if (LD->isIndexed())
15543 return false;
15544 EVT VT = LD->getMemoryVT();
15545 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15546 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15547 return false;
15548 Ptr = LD->getBasePtr();
15549 IsMasked = true;
15550 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15551 if (ST->isIndexed())
15552 return false;
15553 EVT VT = ST->getMemoryVT();
15554 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15555 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15556 return false;
15557 Ptr = ST->getBasePtr();
15558 IsLoad = false;
15559 IsMasked = true;
15560 } else {
15561 return false;
15563 return true;
15566 /// Try turning a load/store into a pre-indexed load/store when the base
15567 /// pointer is an add or subtract and it has other uses besides the load/store.
15568 /// After the transformation, the new indexed load/store has effectively folded
15569 /// the add/subtract in and all of its other uses are redirected to the
15570 /// new load/store.
15571 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15572 if (Level < AfterLegalizeDAG)
15573 return false;
15575 bool IsLoad = true;
15576 bool IsMasked = false;
15577 SDValue Ptr;
15578 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15579 Ptr, TLI))
15580 return false;
15582 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15583 // out. There is no reason to make this a preinc/predec.
15584 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15585 Ptr.getNode()->hasOneUse())
15586 return false;
15588 // Ask the target to do addressing mode selection.
15589 SDValue BasePtr;
15590 SDValue Offset;
15591 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15592 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15593 return false;
15595 // Backends without true r+i pre-indexed forms may need to pass a
15596 // constant base with a variable offset so that constant coercion
15597 // will work with the patterns in canonical form.
15598 bool Swapped = false;
15599 if (isa<ConstantSDNode>(BasePtr)) {
15600 std::swap(BasePtr, Offset);
15601 Swapped = true;
15604 // Don't create a indexed load / store with zero offset.
15605 if (isNullConstant(Offset))
15606 return false;
15608 // Try turning it into a pre-indexed load / store except when:
15609 // 1) The new base ptr is a frame index.
15610 // 2) If N is a store and the new base ptr is either the same as or is a
15611 // predecessor of the value being stored.
15612 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15613 // that would create a cycle.
15614 // 4) All uses are load / store ops that use it as old base ptr.
15616 // Check #1. Preinc'ing a frame index would require copying the stack pointer
15617 // (plus the implicit offset) to a register to preinc anyway.
15618 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15619 return false;
15621 // Check #2.
15622 if (!IsLoad) {
15623 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15624 : cast<StoreSDNode>(N)->getValue();
15626 // Would require a copy.
15627 if (Val == BasePtr)
15628 return false;
15630 // Would create a cycle.
15631 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15632 return false;
15635 // Caches for hasPredecessorHelper.
15636 SmallPtrSet<const SDNode *, 32> Visited;
15637 SmallVector<const SDNode *, 16> Worklist;
15638 Worklist.push_back(N);
15640 // If the offset is a constant, there may be other adds of constants that
15641 // can be folded with this one. We should do this to avoid having to keep
15642 // a copy of the original base pointer.
15643 SmallVector<SDNode *, 16> OtherUses;
15644 if (isa<ConstantSDNode>(Offset))
15645 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15646 UE = BasePtr.getNode()->use_end();
15647 UI != UE; ++UI) {
15648 SDUse &Use = UI.getUse();
15649 // Skip the use that is Ptr and uses of other results from BasePtr's
15650 // node (important for nodes that return multiple results).
15651 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15652 continue;
15654 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15655 continue;
15657 if (Use.getUser()->getOpcode() != ISD::ADD &&
15658 Use.getUser()->getOpcode() != ISD::SUB) {
15659 OtherUses.clear();
15660 break;
15663 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15664 if (!isa<ConstantSDNode>(Op1)) {
15665 OtherUses.clear();
15666 break;
15669 // FIXME: In some cases, we can be smarter about this.
15670 if (Op1.getValueType() != Offset.getValueType()) {
15671 OtherUses.clear();
15672 break;
15675 OtherUses.push_back(Use.getUser());
15678 if (Swapped)
15679 std::swap(BasePtr, Offset);
15681 // Now check for #3 and #4.
15682 bool RealUse = false;
15684 for (SDNode *Use : Ptr.getNode()->uses()) {
15685 if (Use == N)
15686 continue;
15687 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15688 return false;
15690 // If Ptr may be folded in addressing mode of other use, then it's
15691 // not profitable to do this transformation.
15692 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15693 RealUse = true;
15696 if (!RealUse)
15697 return false;
15699 SDValue Result;
15700 if (!IsMasked) {
15701 if (IsLoad)
15702 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15703 else
15704 Result =
15705 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15706 } else {
15707 if (IsLoad)
15708 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15709 Offset, AM);
15710 else
15711 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15712 Offset, AM);
15714 ++PreIndexedNodes;
15715 ++NodesCombined;
15716 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15717 Result.getNode()->dump(&DAG); dbgs() << '\n');
15718 WorklistRemover DeadNodes(*this);
15719 if (IsLoad) {
15720 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15721 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15722 } else {
15723 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15726 // Finally, since the node is now dead, remove it from the graph.
15727 deleteAndRecombine(N);
15729 if (Swapped)
15730 std::swap(BasePtr, Offset);
15732 // Replace other uses of BasePtr that can be updated to use Ptr
15733 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15734 unsigned OffsetIdx = 1;
15735 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15736 OffsetIdx = 0;
15737 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15738 BasePtr.getNode() && "Expected BasePtr operand");
15740 // We need to replace ptr0 in the following expression:
15741 // x0 * offset0 + y0 * ptr0 = t0
15742 // knowing that
15743 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15745 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15746 // indexed load/store and the expression that needs to be re-written.
15748 // Therefore, we have:
15749 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15751 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15752 const APInt &Offset0 = CN->getAPIntValue();
15753 const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15754 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15755 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15756 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15757 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15759 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15761 APInt CNV = Offset0;
15762 if (X0 < 0) CNV = -CNV;
15763 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15764 else CNV = CNV - Offset1;
15766 SDLoc DL(OtherUses[i]);
15768 // We can now generate the new expression.
15769 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15770 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15772 SDValue NewUse = DAG.getNode(Opcode,
15774 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15775 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15776 deleteAndRecombine(OtherUses[i]);
15779 // Replace the uses of Ptr with uses of the updated base value.
15780 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15781 deleteAndRecombine(Ptr.getNode());
15782 AddToWorklist(Result.getNode());
15784 return true;
15787 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15788 SDValue &BasePtr, SDValue &Offset,
15789 ISD::MemIndexedMode &AM,
15790 SelectionDAG &DAG,
15791 const TargetLowering &TLI) {
15792 if (PtrUse == N ||
15793 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15794 return false;
15796 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15797 return false;
15799 // Don't create a indexed load / store with zero offset.
15800 if (isNullConstant(Offset))
15801 return false;
15803 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15804 return false;
15806 SmallPtrSet<const SDNode *, 32> Visited;
15807 for (SDNode *Use : BasePtr.getNode()->uses()) {
15808 if (Use == Ptr.getNode())
15809 continue;
15811 // No if there's a later user which could perform the index instead.
15812 if (isa<MemSDNode>(Use)) {
15813 bool IsLoad = true;
15814 bool IsMasked = false;
15815 SDValue OtherPtr;
15816 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15817 IsMasked, OtherPtr, TLI)) {
15818 SmallVector<const SDNode *, 2> Worklist;
15819 Worklist.push_back(Use);
15820 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15821 return false;
15825 // If all the uses are load / store addresses, then don't do the
15826 // transformation.
15827 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15828 for (SDNode *UseUse : Use->uses())
15829 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15830 return false;
15833 return true;
15836 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15837 bool &IsMasked, SDValue &Ptr,
15838 SDValue &BasePtr, SDValue &Offset,
15839 ISD::MemIndexedMode &AM,
15840 SelectionDAG &DAG,
15841 const TargetLowering &TLI) {
15842 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15843 IsMasked, Ptr, TLI) ||
15844 Ptr.getNode()->hasOneUse())
15845 return nullptr;
15847 // Try turning it into a post-indexed load / store except when
15848 // 1) All uses are load / store ops that use it as base ptr (and
15849 // it may be folded as addressing mmode).
15850 // 2) Op must be independent of N, i.e. Op is neither a predecessor
15851 // nor a successor of N. Otherwise, if Op is folded that would
15852 // create a cycle.
15853 for (SDNode *Op : Ptr->uses()) {
15854 // Check for #1.
15855 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15856 continue;
15858 // Check for #2.
15859 SmallPtrSet<const SDNode *, 32> Visited;
15860 SmallVector<const SDNode *, 8> Worklist;
15861 // Ptr is predecessor to both N and Op.
15862 Visited.insert(Ptr.getNode());
15863 Worklist.push_back(N);
15864 Worklist.push_back(Op);
15865 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15866 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15867 return Op;
15869 return nullptr;
15872 /// Try to combine a load/store with a add/sub of the base pointer node into a
15873 /// post-indexed load/store. The transformation folded the add/subtract into the
15874 /// new indexed load/store effectively and all of its uses are redirected to the
15875 /// new load/store.
15876 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15877 if (Level < AfterLegalizeDAG)
15878 return false;
15880 bool IsLoad = true;
15881 bool IsMasked = false;
15882 SDValue Ptr;
15883 SDValue BasePtr;
15884 SDValue Offset;
15885 ISD::MemIndexedMode AM = ISD::UNINDEXED;
15886 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15887 Offset, AM, DAG, TLI);
15888 if (!Op)
15889 return false;
15891 SDValue Result;
15892 if (!IsMasked)
15893 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15894 Offset, AM)
15895 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15896 BasePtr, Offset, AM);
15897 else
15898 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15899 BasePtr, Offset, AM)
15900 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15901 BasePtr, Offset, AM);
15902 ++PostIndexedNodes;
15903 ++NodesCombined;
15904 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15905 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15906 dbgs() << '\n');
15907 WorklistRemover DeadNodes(*this);
15908 if (IsLoad) {
15909 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15910 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15911 } else {
15912 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15915 // Finally, since the node is now dead, remove it from the graph.
15916 deleteAndRecombine(N);
15918 // Replace the uses of Use with uses of the updated base value.
15919 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15920 Result.getValue(IsLoad ? 1 : 0));
15921 deleteAndRecombine(Op);
15922 return true;
15925 /// Return the base-pointer arithmetic from an indexed \p LD.
15926 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15927 ISD::MemIndexedMode AM = LD->getAddressingMode();
15928 assert(AM != ISD::UNINDEXED);
15929 SDValue BP = LD->getOperand(1);
15930 SDValue Inc = LD->getOperand(2);
15932 // Some backends use TargetConstants for load offsets, but don't expect
15933 // TargetConstants in general ADD nodes. We can convert these constants into
15934 // regular Constants (if the constant is not opaque).
15935 assert((Inc.getOpcode() != ISD::TargetConstant ||
15936 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15937 "Cannot split out indexing using opaque target constants");
15938 if (Inc.getOpcode() == ISD::TargetConstant) {
15939 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15940 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15941 ConstInc->getValueType(0));
15944 unsigned Opc =
15945 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15946 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15949 static inline ElementCount numVectorEltsOrZero(EVT T) {
15950 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15953 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15954 Val = ST->getValue();
15955 EVT STType = Val.getValueType();
15956 EVT STMemType = ST->getMemoryVT();
15957 if (STType == STMemType)
15958 return true;
15959 if (isTypeLegal(STMemType))
15960 return false; // fail.
15961 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15962 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15963 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15964 return true;
15966 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15967 STType.isInteger() && STMemType.isInteger()) {
15968 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15969 return true;
15971 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15972 Val = DAG.getBitcast(STMemType, Val);
15973 return true;
15975 return false; // fail.
15978 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15979 EVT LDMemType = LD->getMemoryVT();
15980 EVT LDType = LD->getValueType(0);
15981 assert(Val.getValueType() == LDMemType &&
15982 "Attempting to extend value of non-matching type");
15983 if (LDType == LDMemType)
15984 return true;
15985 if (LDMemType.isInteger() && LDType.isInteger()) {
15986 switch (LD->getExtensionType()) {
15987 case ISD::NON_EXTLOAD:
15988 Val = DAG.getBitcast(LDType, Val);
15989 return true;
15990 case ISD::EXTLOAD:
15991 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15992 return true;
15993 case ISD::SEXTLOAD:
15994 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15995 return true;
15996 case ISD::ZEXTLOAD:
15997 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15998 return true;
16001 return false;
16004 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16005 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16006 return SDValue();
16007 SDValue Chain = LD->getOperand(0);
16008 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16009 // TODO: Relax this restriction for unordered atomics (see D66309)
16010 if (!ST || !ST->isSimple())
16011 return SDValue();
16013 EVT LDType = LD->getValueType(0);
16014 EVT LDMemType = LD->getMemoryVT();
16015 EVT STMemType = ST->getMemoryVT();
16016 EVT STType = ST->getValue().getValueType();
16018 // There are two cases to consider here:
16019 // 1. The store is fixed width and the load is scalable. In this case we
16020 // don't know at compile time if the store completely envelops the load
16021 // so we abandon the optimisation.
16022 // 2. The store is scalable and the load is fixed width. We could
16023 // potentially support a limited number of cases here, but there has been
16024 // no cost-benefit analysis to prove it's worth it.
16025 bool LdStScalable = LDMemType.isScalableVector();
16026 if (LdStScalable != STMemType.isScalableVector())
16027 return SDValue();
16029 // If we are dealing with scalable vectors on a big endian platform the
16030 // calculation of offsets below becomes trickier, since we do not know at
16031 // compile time the absolute size of the vector. Until we've done more
16032 // analysis on big-endian platforms it seems better to bail out for now.
16033 if (LdStScalable && DAG.getDataLayout().isBigEndian())
16034 return SDValue();
16036 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16037 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16038 int64_t Offset;
16039 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16040 return SDValue();
16042 // Normalize for Endianness. After this Offset=0 will denote that the least
16043 // significant bit in the loaded value maps to the least significant bit in
16044 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16045 // n:th least significant byte of the stored value.
16046 if (DAG.getDataLayout().isBigEndian())
16047 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16048 (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16050 Offset;
16052 // Check that the stored value cover all bits that are loaded.
16053 bool STCoversLD;
16055 TypeSize LdMemSize = LDMemType.getSizeInBits();
16056 TypeSize StMemSize = STMemType.getSizeInBits();
16057 if (LdStScalable)
16058 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16059 else
16060 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16061 StMemSize.getFixedSize());
16063 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16064 if (LD->isIndexed()) {
16065 // Cannot handle opaque target constants and we must respect the user's
16066 // request not to split indexes from loads.
16067 if (!canSplitIdx(LD))
16068 return SDValue();
16069 SDValue Idx = SplitIndexingFromLoad(LD);
16070 SDValue Ops[] = {Val, Idx, Chain};
16071 return CombineTo(LD, Ops, 3);
16073 return CombineTo(LD, Val, Chain);
16076 if (!STCoversLD)
16077 return SDValue();
16079 // Memory as copy space (potentially masked).
16080 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16081 // Simple case: Direct non-truncating forwarding
16082 if (LDType.getSizeInBits() == LdMemSize)
16083 return ReplaceLd(LD, ST->getValue(), Chain);
16084 // Can we model the truncate and extension with an and mask?
16085 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16086 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16087 // Mask to size of LDMemType
16088 auto Mask =
16089 DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16090 StMemSize.getFixedSize()),
16091 SDLoc(ST), STType);
16092 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16093 return ReplaceLd(LD, Val, Chain);
16097 // TODO: Deal with nonzero offset.
16098 if (LD->getBasePtr().isUndef() || Offset != 0)
16099 return SDValue();
16100 // Model necessary truncations / extenstions.
16101 SDValue Val;
16102 // Truncate Value To Stored Memory Size.
16103 do {
16104 if (!getTruncatedStoreValue(ST, Val))
16105 continue;
16106 if (!isTypeLegal(LDMemType))
16107 continue;
16108 if (STMemType != LDMemType) {
16109 // TODO: Support vectors? This requires extract_subvector/bitcast.
16110 if (!STMemType.isVector() && !LDMemType.isVector() &&
16111 STMemType.isInteger() && LDMemType.isInteger())
16112 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16113 else
16114 continue;
16116 if (!extendLoadedValueToExtension(LD, Val))
16117 continue;
16118 return ReplaceLd(LD, Val, Chain);
16119 } while (false);
16121 // On failure, cleanup dead nodes we may have created.
16122 if (Val->use_empty())
16123 deleteAndRecombine(Val.getNode());
16124 return SDValue();
16127 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16128 LoadSDNode *LD = cast<LoadSDNode>(N);
16129 SDValue Chain = LD->getChain();
16130 SDValue Ptr = LD->getBasePtr();
16132 // If load is not volatile and there are no uses of the loaded value (and
16133 // the updated indexed value in case of indexed loads), change uses of the
16134 // chain value into uses of the chain input (i.e. delete the dead load).
16135 // TODO: Allow this for unordered atomics (see D66309)
16136 if (LD->isSimple()) {
16137 if (N->getValueType(1) == MVT::Other) {
16138 // Unindexed loads.
16139 if (!N->hasAnyUseOfValue(0)) {
16140 // It's not safe to use the two value CombineTo variant here. e.g.
16141 // v1, chain2 = load chain1, loc
16142 // v2, chain3 = load chain2, loc
16143 // v3 = add v2, c
16144 // Now we replace use of chain2 with chain1. This makes the second load
16145 // isomorphic to the one we are deleting, and thus makes this load live.
16146 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16147 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
16148 dbgs() << "\n");
16149 WorklistRemover DeadNodes(*this);
16150 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16151 AddUsersToWorklist(Chain.getNode());
16152 if (N->use_empty())
16153 deleteAndRecombine(N);
16155 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16157 } else {
16158 // Indexed loads.
16159 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16161 // If this load has an opaque TargetConstant offset, then we cannot split
16162 // the indexing into an add/sub directly (that TargetConstant may not be
16163 // valid for a different type of node, and we cannot convert an opaque
16164 // target constant into a regular constant).
16165 bool CanSplitIdx = canSplitIdx(LD);
16167 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16168 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16169 SDValue Index;
16170 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16171 Index = SplitIndexingFromLoad(LD);
16172 // Try to fold the base pointer arithmetic into subsequent loads and
16173 // stores.
16174 AddUsersToWorklist(N);
16175 } else
16176 Index = DAG.getUNDEF(N->getValueType(1));
16177 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16178 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
16179 dbgs() << " and 2 other values\n");
16180 WorklistRemover DeadNodes(*this);
16181 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16182 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16183 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16184 deleteAndRecombine(N);
16185 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16190 // If this load is directly stored, replace the load value with the stored
16191 // value.
16192 if (auto V = ForwardStoreValueToDirectLoad(LD))
16193 return V;
16195 // Try to infer better alignment information than the load already has.
16196 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16197 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16198 if (*Alignment > LD->getAlign() &&
16199 isAligned(*Alignment, LD->getSrcValueOffset())) {
16200 SDValue NewLoad = DAG.getExtLoad(
16201 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16202 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16203 LD->getMemOperand()->getFlags(), LD->getAAInfo());
16204 // NewLoad will always be N as we are only refining the alignment
16205 assert(NewLoad.getNode() == N);
16206 (void)NewLoad;
16211 if (LD->isUnindexed()) {
16212 // Walk up chain skipping non-aliasing memory nodes.
16213 SDValue BetterChain = FindBetterChain(LD, Chain);
16215 // If there is a better chain.
16216 if (Chain != BetterChain) {
16217 SDValue ReplLoad;
16219 // Replace the chain to void dependency.
16220 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16221 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16222 BetterChain, Ptr, LD->getMemOperand());
16223 } else {
16224 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16225 LD->getValueType(0),
16226 BetterChain, Ptr, LD->getMemoryVT(),
16227 LD->getMemOperand());
16230 // Create token factor to keep old chain connected.
16231 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16232 MVT::Other, Chain, ReplLoad.getValue(1));
16234 // Replace uses with load result and token factor
16235 return CombineTo(N, ReplLoad.getValue(0), Token);
16239 // Try transforming N to an indexed load.
16240 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16241 return SDValue(N, 0);
16243 // Try to slice up N to more direct loads if the slices are mapped to
16244 // different register banks or pairing can take place.
16245 if (SliceUpLoad(N))
16246 return SDValue(N, 0);
16248 return SDValue();
16251 namespace {
16253 /// Helper structure used to slice a load in smaller loads.
16254 /// Basically a slice is obtained from the following sequence:
16255 /// Origin = load Ty1, Base
16256 /// Shift = srl Ty1 Origin, CstTy Amount
16257 /// Inst = trunc Shift to Ty2
16259 /// Then, it will be rewritten into:
16260 /// Slice = load SliceTy, Base + SliceOffset
16261 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16263 /// SliceTy is deduced from the number of bits that are actually used to
16264 /// build Inst.
16265 struct LoadedSlice {
16266 /// Helper structure used to compute the cost of a slice.
16267 struct Cost {
16268 /// Are we optimizing for code size.
16269 bool ForCodeSize = false;
16271 /// Various cost.
16272 unsigned Loads = 0;
16273 unsigned Truncates = 0;
16274 unsigned CrossRegisterBanksCopies = 0;
16275 unsigned ZExts = 0;
16276 unsigned Shift = 0;
16278 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16280 /// Get the cost of one isolated slice.
16281 Cost(const LoadedSlice &LS, bool ForCodeSize)
16282 : ForCodeSize(ForCodeSize), Loads(1) {
16283 EVT TruncType = LS.Inst->getValueType(0);
16284 EVT LoadedType = LS.getLoadedType();
16285 if (TruncType != LoadedType &&
16286 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16287 ZExts = 1;
16290 /// Account for slicing gain in the current cost.
16291 /// Slicing provide a few gains like removing a shift or a
16292 /// truncate. This method allows to grow the cost of the original
16293 /// load with the gain from this slice.
16294 void addSliceGain(const LoadedSlice &LS) {
16295 // Each slice saves a truncate.
16296 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16297 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16298 LS.Inst->getValueType(0)))
16299 ++Truncates;
16300 // If there is a shift amount, this slice gets rid of it.
16301 if (LS.Shift)
16302 ++Shift;
16303 // If this slice can merge a cross register bank copy, account for it.
16304 if (LS.canMergeExpensiveCrossRegisterBankCopy())
16305 ++CrossRegisterBanksCopies;
16308 Cost &operator+=(const Cost &RHS) {
16309 Loads += RHS.Loads;
16310 Truncates += RHS.Truncates;
16311 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16312 ZExts += RHS.ZExts;
16313 Shift += RHS.Shift;
16314 return *this;
16317 bool operator==(const Cost &RHS) const {
16318 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16319 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16320 ZExts == RHS.ZExts && Shift == RHS.Shift;
16323 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16325 bool operator<(const Cost &RHS) const {
16326 // Assume cross register banks copies are as expensive as loads.
16327 // FIXME: Do we want some more target hooks?
16328 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16329 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16330 // Unless we are optimizing for code size, consider the
16331 // expensive operation first.
16332 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16333 return ExpensiveOpsLHS < ExpensiveOpsRHS;
16334 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16335 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16338 bool operator>(const Cost &RHS) const { return RHS < *this; }
16340 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16342 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16345 // The last instruction that represent the slice. This should be a
16346 // truncate instruction.
16347 SDNode *Inst;
16349 // The original load instruction.
16350 LoadSDNode *Origin;
16352 // The right shift amount in bits from the original load.
16353 unsigned Shift;
16355 // The DAG from which Origin came from.
16356 // This is used to get some contextual information about legal types, etc.
16357 SelectionDAG *DAG;
16359 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16360 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16361 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16363 /// Get the bits used in a chunk of bits \p BitWidth large.
16364 /// \return Result is \p BitWidth and has used bits set to 1 and
16365 /// not used bits set to 0.
16366 APInt getUsedBits() const {
16367 // Reproduce the trunc(lshr) sequence:
16368 // - Start from the truncated value.
16369 // - Zero extend to the desired bit width.
16370 // - Shift left.
16371 assert(Origin && "No original load to compare against.");
16372 unsigned BitWidth = Origin->getValueSizeInBits(0);
16373 assert(Inst && "This slice is not bound to an instruction");
16374 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16375 "Extracted slice is bigger than the whole type!");
16376 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16377 UsedBits.setAllBits();
16378 UsedBits = UsedBits.zext(BitWidth);
16379 UsedBits <<= Shift;
16380 return UsedBits;
16383 /// Get the size of the slice to be loaded in bytes.
16384 unsigned getLoadedSize() const {
16385 unsigned SliceSize = getUsedBits().countPopulation();
16386 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16387 return SliceSize / 8;
16390 /// Get the type that will be loaded for this slice.
16391 /// Note: This may not be the final type for the slice.
16392 EVT getLoadedType() const {
16393 assert(DAG && "Missing context");
16394 LLVMContext &Ctxt = *DAG->getContext();
16395 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16398 /// Get the alignment of the load used for this slice.
16399 Align getAlign() const {
16400 Align Alignment = Origin->getAlign();
16401 uint64_t Offset = getOffsetFromBase();
16402 if (Offset != 0)
16403 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16404 return Alignment;
16407 /// Check if this slice can be rewritten with legal operations.
16408 bool isLegal() const {
16409 // An invalid slice is not legal.
16410 if (!Origin || !Inst || !DAG)
16411 return false;
16413 // Offsets are for indexed load only, we do not handle that.
16414 if (!Origin->getOffset().isUndef())
16415 return false;
16417 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16419 // Check that the type is legal.
16420 EVT SliceType = getLoadedType();
16421 if (!TLI.isTypeLegal(SliceType))
16422 return false;
16424 // Check that the load is legal for this type.
16425 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16426 return false;
16428 // Check that the offset can be computed.
16429 // 1. Check its type.
16430 EVT PtrType = Origin->getBasePtr().getValueType();
16431 if (PtrType == MVT::Untyped || PtrType.isExtended())
16432 return false;
16434 // 2. Check that it fits in the immediate.
16435 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16436 return false;
16438 // 3. Check that the computation is legal.
16439 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16440 return false;
16442 // Check that the zext is legal if it needs one.
16443 EVT TruncateType = Inst->getValueType(0);
16444 if (TruncateType != SliceType &&
16445 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16446 return false;
16448 return true;
16451 /// Get the offset in bytes of this slice in the original chunk of
16452 /// bits.
16453 /// \pre DAG != nullptr.
16454 uint64_t getOffsetFromBase() const {
16455 assert(DAG && "Missing context.");
16456 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16457 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16458 uint64_t Offset = Shift / 8;
16459 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16460 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16461 "The size of the original loaded type is not a multiple of a"
16462 " byte.");
16463 // If Offset is bigger than TySizeInBytes, it means we are loading all
16464 // zeros. This should have been optimized before in the process.
16465 assert(TySizeInBytes > Offset &&
16466 "Invalid shift amount for given loaded size");
16467 if (IsBigEndian)
16468 Offset = TySizeInBytes - Offset - getLoadedSize();
16469 return Offset;
16472 /// Generate the sequence of instructions to load the slice
16473 /// represented by this object and redirect the uses of this slice to
16474 /// this new sequence of instructions.
16475 /// \pre this->Inst && this->Origin are valid Instructions and this
16476 /// object passed the legal check: LoadedSlice::isLegal returned true.
16477 /// \return The last instruction of the sequence used to load the slice.
16478 SDValue loadSlice() const {
16479 assert(Inst && Origin && "Unable to replace a non-existing slice.");
16480 const SDValue &OldBaseAddr = Origin->getBasePtr();
16481 SDValue BaseAddr = OldBaseAddr;
16482 // Get the offset in that chunk of bytes w.r.t. the endianness.
16483 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16484 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16485 if (Offset) {
16486 // BaseAddr = BaseAddr + Offset.
16487 EVT ArithType = BaseAddr.getValueType();
16488 SDLoc DL(Origin);
16489 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16490 DAG->getConstant(Offset, DL, ArithType));
16493 // Create the type of the loaded slice according to its size.
16494 EVT SliceType = getLoadedType();
16496 // Create the load for the slice.
16497 SDValue LastInst =
16498 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16499 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16500 Origin->getMemOperand()->getFlags());
16501 // If the final type is not the same as the loaded type, this means that
16502 // we have to pad with zero. Create a zero extend for that.
16503 EVT FinalType = Inst->getValueType(0);
16504 if (SliceType != FinalType)
16505 LastInst =
16506 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16507 return LastInst;
16510 /// Check if this slice can be merged with an expensive cross register
16511 /// bank copy. E.g.,
16512 /// i = load i32
16513 /// f = bitcast i32 i to float
16514 bool canMergeExpensiveCrossRegisterBankCopy() const {
16515 if (!Inst || !Inst->hasOneUse())
16516 return false;
16517 SDNode *Use = *Inst->use_begin();
16518 if (Use->getOpcode() != ISD::BITCAST)
16519 return false;
16520 assert(DAG && "Missing context");
16521 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16522 EVT ResVT = Use->getValueType(0);
16523 const TargetRegisterClass *ResRC =
16524 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16525 const TargetRegisterClass *ArgRC =
16526 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16527 Use->getOperand(0)->isDivergent());
16528 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16529 return false;
16531 // At this point, we know that we perform a cross-register-bank copy.
16532 // Check if it is expensive.
16533 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16534 // Assume bitcasts are cheap, unless both register classes do not
16535 // explicitly share a common sub class.
16536 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16537 return false;
16539 // Check if it will be merged with the load.
16540 // 1. Check the alignment / fast memory access constraint.
16541 bool IsFast = false;
16542 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16543 Origin->getAddressSpace(), getAlign(),
16544 Origin->getMemOperand()->getFlags(), &IsFast) ||
16545 !IsFast)
16546 return false;
16548 // 2. Check that the load is a legal operation for that type.
16549 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16550 return false;
16552 // 3. Check that we do not have a zext in the way.
16553 if (Inst->getValueType(0) != getLoadedType())
16554 return false;
16556 return true;
16560 } // end anonymous namespace
16562 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16563 /// \p UsedBits looks like 0..0 1..1 0..0.
16564 static bool areUsedBitsDense(const APInt &UsedBits) {
16565 // If all the bits are one, this is dense!
16566 if (UsedBits.isAllOnes())
16567 return true;
16569 // Get rid of the unused bits on the right.
16570 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16571 // Get rid of the unused bits on the left.
16572 if (NarrowedUsedBits.countLeadingZeros())
16573 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16574 // Check that the chunk of bits is completely used.
16575 return NarrowedUsedBits.isAllOnes();
16578 /// Check whether or not \p First and \p Second are next to each other
16579 /// in memory. This means that there is no hole between the bits loaded
16580 /// by \p First and the bits loaded by \p Second.
16581 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16582 const LoadedSlice &Second) {
16583 assert(First.Origin == Second.Origin && First.Origin &&
16584 "Unable to match different memory origins.");
16585 APInt UsedBits = First.getUsedBits();
16586 assert((UsedBits & Second.getUsedBits()) == 0 &&
16587 "Slices are not supposed to overlap.");
16588 UsedBits |= Second.getUsedBits();
16589 return areUsedBitsDense(UsedBits);
16592 /// Adjust the \p GlobalLSCost according to the target
16593 /// paring capabilities and the layout of the slices.
16594 /// \pre \p GlobalLSCost should account for at least as many loads as
16595 /// there is in the slices in \p LoadedSlices.
16596 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16597 LoadedSlice::Cost &GlobalLSCost) {
16598 unsigned NumberOfSlices = LoadedSlices.size();
16599 // If there is less than 2 elements, no pairing is possible.
16600 if (NumberOfSlices < 2)
16601 return;
16603 // Sort the slices so that elements that are likely to be next to each
16604 // other in memory are next to each other in the list.
16605 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16606 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16607 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16609 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16610 // First (resp. Second) is the first (resp. Second) potentially candidate
16611 // to be placed in a paired load.
16612 const LoadedSlice *First = nullptr;
16613 const LoadedSlice *Second = nullptr;
16614 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16615 // Set the beginning of the pair.
16616 First = Second) {
16617 Second = &LoadedSlices[CurrSlice];
16619 // If First is NULL, it means we start a new pair.
16620 // Get to the next slice.
16621 if (!First)
16622 continue;
16624 EVT LoadedType = First->getLoadedType();
16626 // If the types of the slices are different, we cannot pair them.
16627 if (LoadedType != Second->getLoadedType())
16628 continue;
16630 // Check if the target supplies paired loads for this type.
16631 Align RequiredAlignment;
16632 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16633 // move to the next pair, this type is hopeless.
16634 Second = nullptr;
16635 continue;
16637 // Check if we meet the alignment requirement.
16638 if (First->getAlign() < RequiredAlignment)
16639 continue;
16641 // Check that both loads are next to each other in memory.
16642 if (!areSlicesNextToEachOther(*First, *Second))
16643 continue;
16645 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16646 --GlobalLSCost.Loads;
16647 // Move to the next pair.
16648 Second = nullptr;
16652 /// Check the profitability of all involved LoadedSlice.
16653 /// Currently, it is considered profitable if there is exactly two
16654 /// involved slices (1) which are (2) next to each other in memory, and
16655 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16657 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16658 /// the elements themselves.
16660 /// FIXME: When the cost model will be mature enough, we can relax
16661 /// constraints (1) and (2).
16662 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16663 const APInt &UsedBits, bool ForCodeSize) {
16664 unsigned NumberOfSlices = LoadedSlices.size();
16665 if (StressLoadSlicing)
16666 return NumberOfSlices > 1;
16668 // Check (1).
16669 if (NumberOfSlices != 2)
16670 return false;
16672 // Check (2).
16673 if (!areUsedBitsDense(UsedBits))
16674 return false;
16676 // Check (3).
16677 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16678 // The original code has one big load.
16679 OrigCost.Loads = 1;
16680 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16681 const LoadedSlice &LS = LoadedSlices[CurrSlice];
16682 // Accumulate the cost of all the slices.
16683 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16684 GlobalSlicingCost += SliceCost;
16686 // Account as cost in the original configuration the gain obtained
16687 // with the current slices.
16688 OrigCost.addSliceGain(LS);
16691 // If the target supports paired load, adjust the cost accordingly.
16692 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16693 return OrigCost > GlobalSlicingCost;
16696 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16697 /// operations, split it in the various pieces being extracted.
16699 /// This sort of thing is introduced by SROA.
16700 /// This slicing takes care not to insert overlapping loads.
16701 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16702 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16703 if (Level < AfterLegalizeDAG)
16704 return false;
16706 LoadSDNode *LD = cast<LoadSDNode>(N);
16707 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16708 !LD->getValueType(0).isInteger())
16709 return false;
16711 // The algorithm to split up a load of a scalable vector into individual
16712 // elements currently requires knowing the length of the loaded type,
16713 // so will need adjusting to work on scalable vectors.
16714 if (LD->getValueType(0).isScalableVector())
16715 return false;
16717 // Keep track of already used bits to detect overlapping values.
16718 // In that case, we will just abort the transformation.
16719 APInt UsedBits(LD->getValueSizeInBits(0), 0);
16721 SmallVector<LoadedSlice, 4> LoadedSlices;
16723 // Check if this load is used as several smaller chunks of bits.
16724 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16725 // of computation for each trunc.
16726 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16727 UI != UIEnd; ++UI) {
16728 // Skip the uses of the chain.
16729 if (UI.getUse().getResNo() != 0)
16730 continue;
16732 SDNode *User = *UI;
16733 unsigned Shift = 0;
16735 // Check if this is a trunc(lshr).
16736 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16737 isa<ConstantSDNode>(User->getOperand(1))) {
16738 Shift = User->getConstantOperandVal(1);
16739 User = *User->use_begin();
16742 // At this point, User is a Truncate, iff we encountered, trunc or
16743 // trunc(lshr).
16744 if (User->getOpcode() != ISD::TRUNCATE)
16745 return false;
16747 // The width of the type must be a power of 2 and greater than 8-bits.
16748 // Otherwise the load cannot be represented in LLVM IR.
16749 // Moreover, if we shifted with a non-8-bits multiple, the slice
16750 // will be across several bytes. We do not support that.
16751 unsigned Width = User->getValueSizeInBits(0);
16752 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16753 return false;
16755 // Build the slice for this chain of computations.
16756 LoadedSlice LS(User, LD, Shift, &DAG);
16757 APInt CurrentUsedBits = LS.getUsedBits();
16759 // Check if this slice overlaps with another.
16760 if ((CurrentUsedBits & UsedBits) != 0)
16761 return false;
16762 // Update the bits used globally.
16763 UsedBits |= CurrentUsedBits;
16765 // Check if the new slice would be legal.
16766 if (!LS.isLegal())
16767 return false;
16769 // Record the slice.
16770 LoadedSlices.push_back(LS);
16773 // Abort slicing if it does not seem to be profitable.
16774 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16775 return false;
16777 ++SlicedLoads;
16779 // Rewrite each chain to use an independent load.
16780 // By construction, each chain can be represented by a unique load.
16782 // Prepare the argument for the new token factor for all the slices.
16783 SmallVector<SDValue, 8> ArgChains;
16784 for (const LoadedSlice &LS : LoadedSlices) {
16785 SDValue SliceInst = LS.loadSlice();
16786 CombineTo(LS.Inst, SliceInst, true);
16787 if (SliceInst.getOpcode() != ISD::LOAD)
16788 SliceInst = SliceInst.getOperand(0);
16789 assert(SliceInst->getOpcode() == ISD::LOAD &&
16790 "It takes more than a zext to get to the loaded slice!!");
16791 ArgChains.push_back(SliceInst.getValue(1));
16794 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16795 ArgChains);
16796 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16797 AddToWorklist(Chain.getNode());
16798 return true;
16801 /// Check to see if V is (and load (ptr), imm), where the load is having
16802 /// specific bytes cleared out. If so, return the byte size being masked out
16803 /// and the shift amount.
16804 static std::pair<unsigned, unsigned>
16805 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16806 std::pair<unsigned, unsigned> Result(0, 0);
16808 // Check for the structure we're looking for.
16809 if (V->getOpcode() != ISD::AND ||
16810 !isa<ConstantSDNode>(V->getOperand(1)) ||
16811 !ISD::isNormalLoad(V->getOperand(0).getNode()))
16812 return Result;
16814 // Check the chain and pointer.
16815 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16816 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
16818 // This only handles simple types.
16819 if (V.getValueType() != MVT::i16 &&
16820 V.getValueType() != MVT::i32 &&
16821 V.getValueType() != MVT::i64)
16822 return Result;
16824 // Check the constant mask. Invert it so that the bits being masked out are
16825 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
16826 // follow the sign bit for uniformity.
16827 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16828 unsigned NotMaskLZ = countLeadingZeros(NotMask);
16829 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
16830 unsigned NotMaskTZ = countTrailingZeros(NotMask);
16831 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
16832 if (NotMaskLZ == 64) return Result; // All zero mask.
16834 // See if we have a continuous run of bits. If so, we have 0*1+0*
16835 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16836 return Result;
16838 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16839 if (V.getValueType() != MVT::i64 && NotMaskLZ)
16840 NotMaskLZ -= 64-V.getValueSizeInBits();
16842 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16843 switch (MaskedBytes) {
16844 case 1:
16845 case 2:
16846 case 4: break;
16847 default: return Result; // All one mask, or 5-byte mask.
16850 // Verify that the first bit starts at a multiple of mask so that the access
16851 // is aligned the same as the access width.
16852 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16854 // For narrowing to be valid, it must be the case that the load the
16855 // immediately preceding memory operation before the store.
16856 if (LD == Chain.getNode())
16857 ; // ok.
16858 else if (Chain->getOpcode() == ISD::TokenFactor &&
16859 SDValue(LD, 1).hasOneUse()) {
16860 // LD has only 1 chain use so they are no indirect dependencies.
16861 if (!LD->isOperandOf(Chain.getNode()))
16862 return Result;
16863 } else
16864 return Result; // Fail.
16866 Result.first = MaskedBytes;
16867 Result.second = NotMaskTZ/8;
16868 return Result;
16871 /// Check to see if IVal is something that provides a value as specified by
16872 /// MaskInfo. If so, replace the specified store with a narrower store of
16873 /// truncated IVal.
16874 static SDValue
16875 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16876 SDValue IVal, StoreSDNode *St,
16877 DAGCombiner *DC) {
16878 unsigned NumBytes = MaskInfo.first;
16879 unsigned ByteShift = MaskInfo.second;
16880 SelectionDAG &DAG = DC->getDAG();
16882 // Check to see if IVal is all zeros in the part being masked in by the 'or'
16883 // that uses this. If not, this is not a replacement.
16884 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16885 ByteShift*8, (ByteShift+NumBytes)*8);
16886 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16888 // Check that it is legal on the target to do this. It is legal if the new
16889 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16890 // legalization (and the target doesn't explicitly think this is a bad idea).
16891 MVT VT = MVT::getIntegerVT(NumBytes * 8);
16892 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16893 if (!DC->isTypeLegal(VT))
16894 return SDValue();
16895 if (St->getMemOperand() &&
16896 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16897 *St->getMemOperand()))
16898 return SDValue();
16900 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
16901 // shifted by ByteShift and truncated down to NumBytes.
16902 if (ByteShift) {
16903 SDLoc DL(IVal);
16904 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16905 DAG.getConstant(ByteShift*8, DL,
16906 DC->getShiftAmountTy(IVal.getValueType())));
16909 // Figure out the offset for the store and the alignment of the access.
16910 unsigned StOffset;
16911 if (DAG.getDataLayout().isLittleEndian())
16912 StOffset = ByteShift;
16913 else
16914 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16916 SDValue Ptr = St->getBasePtr();
16917 if (StOffset) {
16918 SDLoc DL(IVal);
16919 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16922 // Truncate down to the new size.
16923 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16925 ++OpsNarrowed;
16926 return DAG
16927 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16928 St->getPointerInfo().getWithOffset(StOffset),
16929 St->getOriginalAlign());
16932 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16933 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16934 /// narrowing the load and store if it would end up being a win for performance
16935 /// or code size.
16936 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16937 StoreSDNode *ST = cast<StoreSDNode>(N);
16938 if (!ST->isSimple())
16939 return SDValue();
16941 SDValue Chain = ST->getChain();
16942 SDValue Value = ST->getValue();
16943 SDValue Ptr = ST->getBasePtr();
16944 EVT VT = Value.getValueType();
16946 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16947 return SDValue();
16949 unsigned Opc = Value.getOpcode();
16951 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16952 // is a byte mask indicating a consecutive number of bytes, check to see if
16953 // Y is known to provide just those bytes. If so, we try to replace the
16954 // load + replace + store sequence with a single (narrower) store, which makes
16955 // the load dead.
16956 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16957 std::pair<unsigned, unsigned> MaskedLoad;
16958 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16959 if (MaskedLoad.first)
16960 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16961 Value.getOperand(1), ST,this))
16962 return NewST;
16964 // Or is commutative, so try swapping X and Y.
16965 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16966 if (MaskedLoad.first)
16967 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16968 Value.getOperand(0), ST,this))
16969 return NewST;
16972 if (!EnableReduceLoadOpStoreWidth)
16973 return SDValue();
16975 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16976 Value.getOperand(1).getOpcode() != ISD::Constant)
16977 return SDValue();
16979 SDValue N0 = Value.getOperand(0);
16980 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16981 Chain == SDValue(N0.getNode(), 1)) {
16982 LoadSDNode *LD = cast<LoadSDNode>(N0);
16983 if (LD->getBasePtr() != Ptr ||
16984 LD->getPointerInfo().getAddrSpace() !=
16985 ST->getPointerInfo().getAddrSpace())
16986 return SDValue();
16988 // Find the type to narrow it the load / op / store to.
16989 SDValue N1 = Value.getOperand(1);
16990 unsigned BitWidth = N1.getValueSizeInBits();
16991 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16992 if (Opc == ISD::AND)
16993 Imm ^= APInt::getAllOnes(BitWidth);
16994 if (Imm == 0 || Imm.isAllOnes())
16995 return SDValue();
16996 unsigned ShAmt = Imm.countTrailingZeros();
16997 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16998 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16999 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17000 // The narrowing should be profitable, the load/store operation should be
17001 // legal (or custom) and the store size should be equal to the NewVT width.
17002 while (NewBW < BitWidth &&
17003 (NewVT.getStoreSizeInBits() != NewBW ||
17004 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17005 !TLI.isNarrowingProfitable(VT, NewVT))) {
17006 NewBW = NextPowerOf2(NewBW);
17007 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17009 if (NewBW >= BitWidth)
17010 return SDValue();
17012 // If the lsb changed does not start at the type bitwidth boundary,
17013 // start at the previous one.
17014 if (ShAmt % NewBW)
17015 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17016 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17017 std::min(BitWidth, ShAmt + NewBW));
17018 if ((Imm & Mask) == Imm) {
17019 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17020 if (Opc == ISD::AND)
17021 NewImm ^= APInt::getAllOnes(NewBW);
17022 uint64_t PtrOff = ShAmt / 8;
17023 // For big endian targets, we need to adjust the offset to the pointer to
17024 // load the correct bytes.
17025 if (DAG.getDataLayout().isBigEndian())
17026 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17028 bool IsFast = false;
17029 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17030 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17031 LD->getAddressSpace(), NewAlign,
17032 LD->getMemOperand()->getFlags(), &IsFast) ||
17033 !IsFast)
17034 return SDValue();
17036 SDValue NewPtr =
17037 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17038 SDValue NewLD =
17039 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17040 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17041 LD->getMemOperand()->getFlags(), LD->getAAInfo());
17042 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17043 DAG.getConstant(NewImm, SDLoc(Value),
17044 NewVT));
17045 SDValue NewST =
17046 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17047 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17049 AddToWorklist(NewPtr.getNode());
17050 AddToWorklist(NewLD.getNode());
17051 AddToWorklist(NewVal.getNode());
17052 WorklistRemover DeadNodes(*this);
17053 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17054 ++OpsNarrowed;
17055 return NewST;
17059 return SDValue();
17062 /// For a given floating point load / store pair, if the load value isn't used
17063 /// by any other operations, then consider transforming the pair to integer
17064 /// load / store operations if the target deems the transformation profitable.
17065 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17066 StoreSDNode *ST = cast<StoreSDNode>(N);
17067 SDValue Value = ST->getValue();
17068 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17069 Value.hasOneUse()) {
17070 LoadSDNode *LD = cast<LoadSDNode>(Value);
17071 EVT VT = LD->getMemoryVT();
17072 if (!VT.isFloatingPoint() ||
17073 VT != ST->getMemoryVT() ||
17074 LD->isNonTemporal() ||
17075 ST->isNonTemporal() ||
17076 LD->getPointerInfo().getAddrSpace() != 0 ||
17077 ST->getPointerInfo().getAddrSpace() != 0)
17078 return SDValue();
17080 TypeSize VTSize = VT.getSizeInBits();
17082 // We don't know the size of scalable types at compile time so we cannot
17083 // create an integer of the equivalent size.
17084 if (VTSize.isScalable())
17085 return SDValue();
17087 bool FastLD = false, FastST = false;
17088 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17089 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17090 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17091 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17092 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17093 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17094 *LD->getMemOperand(), &FastLD) ||
17095 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17096 *ST->getMemOperand(), &FastST) ||
17097 !FastLD || !FastST)
17098 return SDValue();
17100 SDValue NewLD =
17101 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17102 LD->getPointerInfo(), LD->getAlign());
17104 SDValue NewST =
17105 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17106 ST->getPointerInfo(), ST->getAlign());
17108 AddToWorklist(NewLD.getNode());
17109 AddToWorklist(NewST.getNode());
17110 WorklistRemover DeadNodes(*this);
17111 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17112 ++LdStFP2Int;
17113 return NewST;
17116 return SDValue();
17119 // This is a helper function for visitMUL to check the profitability
17120 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17121 // MulNode is the original multiply, AddNode is (add x, c1),
17122 // and ConstNode is c2.
17124 // If the (add x, c1) has multiple uses, we could increase
17125 // the number of adds if we make this transformation.
17126 // It would only be worth doing this if we can remove a
17127 // multiply in the process. Check for that here.
17128 // To illustrate:
17129 // (A + c1) * c3
17130 // (A + c2) * c3
17131 // We're checking for cases where we have common "c3 * A" expressions.
17132 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
17133 SDValue &AddNode,
17134 SDValue &ConstNode) {
17135 APInt Val;
17137 // If the add only has one use, and the target thinks the folding is
17138 // profitable or does not lead to worse code, this would be OK to do.
17139 if (AddNode.getNode()->hasOneUse() &&
17140 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17141 return true;
17143 // Walk all the users of the constant with which we're multiplying.
17144 for (SDNode *Use : ConstNode->uses()) {
17145 if (Use == MulNode) // This use is the one we're on right now. Skip it.
17146 continue;
17148 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17149 SDNode *OtherOp;
17150 SDNode *MulVar = AddNode.getOperand(0).getNode();
17152 // OtherOp is what we're multiplying against the constant.
17153 if (Use->getOperand(0) == ConstNode)
17154 OtherOp = Use->getOperand(1).getNode();
17155 else
17156 OtherOp = Use->getOperand(0).getNode();
17158 // Check to see if multiply is with the same operand of our "add".
17160 // ConstNode = CONST
17161 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
17162 // ...
17163 // AddNode = (A + c1) <-- MulVar is A.
17164 // = AddNode * ConstNode <-- current visiting instruction.
17166 // If we make this transformation, we will have a common
17167 // multiply (ConstNode * A) that we can save.
17168 if (OtherOp == MulVar)
17169 return true;
17171 // Now check to see if a future expansion will give us a common
17172 // multiply.
17174 // ConstNode = CONST
17175 // AddNode = (A + c1)
17176 // ... = AddNode * ConstNode <-- current visiting instruction.
17177 // ...
17178 // OtherOp = (A + c2)
17179 // Use = OtherOp * ConstNode <-- visiting Use.
17181 // If we make this transformation, we will have a common
17182 // multiply (CONST * A) after we also do the same transformation
17183 // to the "t2" instruction.
17184 if (OtherOp->getOpcode() == ISD::ADD &&
17185 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17186 OtherOp->getOperand(0).getNode() == MulVar)
17187 return true;
17191 // Didn't find a case where this would be profitable.
17192 return false;
17195 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17196 unsigned NumStores) {
17197 SmallVector<SDValue, 8> Chains;
17198 SmallPtrSet<const SDNode *, 8> Visited;
17199 SDLoc StoreDL(StoreNodes[0].MemNode);
17201 for (unsigned i = 0; i < NumStores; ++i) {
17202 Visited.insert(StoreNodes[i].MemNode);
17205 // don't include nodes that are children or repeated nodes.
17206 for (unsigned i = 0; i < NumStores; ++i) {
17207 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17208 Chains.push_back(StoreNodes[i].MemNode->getChain());
17211 assert(Chains.size() > 0 && "Chain should have generated a chain");
17212 return DAG.getTokenFactor(StoreDL, Chains);
17215 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17216 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17217 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17218 // Make sure we have something to merge.
17219 if (NumStores < 2)
17220 return false;
17222 assert((!UseTrunc || !UseVector) &&
17223 "This optimization cannot emit a vector truncating store");
17225 // The latest Node in the DAG.
17226 SDLoc DL(StoreNodes[0].MemNode);
17228 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17229 unsigned SizeInBits = NumStores * ElementSizeBits;
17230 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17232 Optional<MachineMemOperand::Flags> Flags;
17233 AAMDNodes AAInfo;
17234 for (unsigned I = 0; I != NumStores; ++I) {
17235 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17236 if (!Flags) {
17237 Flags = St->getMemOperand()->getFlags();
17238 AAInfo = St->getAAInfo();
17239 continue;
17241 // Skip merging if there's an inconsistent flag.
17242 if (Flags != St->getMemOperand()->getFlags())
17243 return false;
17244 // Concatenate AA metadata.
17245 AAInfo = AAInfo.concat(St->getAAInfo());
17248 EVT StoreTy;
17249 if (UseVector) {
17250 unsigned Elts = NumStores * NumMemElts;
17251 // Get the type for the merged vector store.
17252 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17253 } else
17254 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17256 SDValue StoredVal;
17257 if (UseVector) {
17258 if (IsConstantSrc) {
17259 SmallVector<SDValue, 8> BuildVector;
17260 for (unsigned I = 0; I != NumStores; ++I) {
17261 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17262 SDValue Val = St->getValue();
17263 // If constant is of the wrong type, convert it now.
17264 if (MemVT != Val.getValueType()) {
17265 Val = peekThroughBitcasts(Val);
17266 // Deal with constants of wrong size.
17267 if (ElementSizeBits != Val.getValueSizeInBits()) {
17268 EVT IntMemVT =
17269 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17270 if (isa<ConstantFPSDNode>(Val)) {
17271 // Not clear how to truncate FP values.
17272 return false;
17273 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17274 Val = DAG.getConstant(C->getAPIntValue()
17275 .zextOrTrunc(Val.getValueSizeInBits())
17276 .zextOrTrunc(ElementSizeBits),
17277 SDLoc(C), IntMemVT);
17279 // Make sure correctly size type is the correct type.
17280 Val = DAG.getBitcast(MemVT, Val);
17282 BuildVector.push_back(Val);
17284 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17285 : ISD::BUILD_VECTOR,
17286 DL, StoreTy, BuildVector);
17287 } else {
17288 SmallVector<SDValue, 8> Ops;
17289 for (unsigned i = 0; i < NumStores; ++i) {
17290 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17291 SDValue Val = peekThroughBitcasts(St->getValue());
17292 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17293 // type MemVT. If the underlying value is not the correct
17294 // type, but it is an extraction of an appropriate vector we
17295 // can recast Val to be of the correct type. This may require
17296 // converting between EXTRACT_VECTOR_ELT and
17297 // EXTRACT_SUBVECTOR.
17298 if ((MemVT != Val.getValueType()) &&
17299 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17300 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17301 EVT MemVTScalarTy = MemVT.getScalarType();
17302 // We may need to add a bitcast here to get types to line up.
17303 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17304 Val = DAG.getBitcast(MemVT, Val);
17305 } else {
17306 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17307 : ISD::EXTRACT_VECTOR_ELT;
17308 SDValue Vec = Val.getOperand(0);
17309 SDValue Idx = Val.getOperand(1);
17310 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17313 Ops.push_back(Val);
17316 // Build the extracted vector elements back into a vector.
17317 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17318 : ISD::BUILD_VECTOR,
17319 DL, StoreTy, Ops);
17321 } else {
17322 // We should always use a vector store when merging extracted vector
17323 // elements, so this path implies a store of constants.
17324 assert(IsConstantSrc && "Merged vector elements should use vector store");
17326 APInt StoreInt(SizeInBits, 0);
17328 // Construct a single integer constant which is made of the smaller
17329 // constant inputs.
17330 bool IsLE = DAG.getDataLayout().isLittleEndian();
17331 for (unsigned i = 0; i < NumStores; ++i) {
17332 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17333 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17335 SDValue Val = St->getValue();
17336 Val = peekThroughBitcasts(Val);
17337 StoreInt <<= ElementSizeBits;
17338 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17339 StoreInt |= C->getAPIntValue()
17340 .zextOrTrunc(ElementSizeBits)
17341 .zextOrTrunc(SizeInBits);
17342 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17343 StoreInt |= C->getValueAPF()
17344 .bitcastToAPInt()
17345 .zextOrTrunc(ElementSizeBits)
17346 .zextOrTrunc(SizeInBits);
17347 // If fp truncation is necessary give up for now.
17348 if (MemVT.getSizeInBits() != ElementSizeBits)
17349 return false;
17350 } else {
17351 llvm_unreachable("Invalid constant element type");
17355 // Create the new Load and Store operations.
17356 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17359 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17360 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17362 // make sure we use trunc store if it's necessary to be legal.
17363 SDValue NewStore;
17364 if (!UseTrunc) {
17365 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17366 FirstInChain->getPointerInfo(),
17367 FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17368 } else { // Must be realized as a trunc store
17369 EVT LegalizedStoredValTy =
17370 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17371 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17372 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17373 SDValue ExtendedStoreVal =
17374 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17375 LegalizedStoredValTy);
17376 NewStore = DAG.getTruncStore(
17377 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17378 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17379 FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17382 // Replace all merged stores with the new store.
17383 for (unsigned i = 0; i < NumStores; ++i)
17384 CombineTo(StoreNodes[i].MemNode, NewStore);
17386 AddToWorklist(NewChain.getNode());
17387 return true;
17390 void DAGCombiner::getStoreMergeCandidates(
17391 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17392 SDNode *&RootNode) {
17393 // This holds the base pointer, index, and the offset in bytes from the base
17394 // pointer. We must have a base and an offset. Do not handle stores to undef
17395 // base pointers.
17396 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17397 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17398 return;
17400 SDValue Val = peekThroughBitcasts(St->getValue());
17401 StoreSource StoreSrc = getStoreSource(Val);
17402 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17404 // Match on loadbaseptr if relevant.
17405 EVT MemVT = St->getMemoryVT();
17406 BaseIndexOffset LBasePtr;
17407 EVT LoadVT;
17408 if (StoreSrc == StoreSource::Load) {
17409 auto *Ld = cast<LoadSDNode>(Val);
17410 LBasePtr = BaseIndexOffset::match(Ld, DAG);
17411 LoadVT = Ld->getMemoryVT();
17412 // Load and store should be the same type.
17413 if (MemVT != LoadVT)
17414 return;
17415 // Loads must only have one use.
17416 if (!Ld->hasNUsesOfValue(1, 0))
17417 return;
17418 // The memory operands must not be volatile/indexed/atomic.
17419 // TODO: May be able to relax for unordered atomics (see D66309)
17420 if (!Ld->isSimple() || Ld->isIndexed())
17421 return;
17423 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17424 int64_t &Offset) -> bool {
17425 // The memory operands must not be volatile/indexed/atomic.
17426 // TODO: May be able to relax for unordered atomics (see D66309)
17427 if (!Other->isSimple() || Other->isIndexed())
17428 return false;
17429 // Don't mix temporal stores with non-temporal stores.
17430 if (St->isNonTemporal() != Other->isNonTemporal())
17431 return false;
17432 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17433 // Allow merging constants of different types as integers.
17434 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17435 : Other->getMemoryVT() != MemVT;
17436 switch (StoreSrc) {
17437 case StoreSource::Load: {
17438 if (NoTypeMatch)
17439 return false;
17440 // The Load's Base Ptr must also match.
17441 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17442 if (!OtherLd)
17443 return false;
17444 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17445 if (LoadVT != OtherLd->getMemoryVT())
17446 return false;
17447 // Loads must only have one use.
17448 if (!OtherLd->hasNUsesOfValue(1, 0))
17449 return false;
17450 // The memory operands must not be volatile/indexed/atomic.
17451 // TODO: May be able to relax for unordered atomics (see D66309)
17452 if (!OtherLd->isSimple() || OtherLd->isIndexed())
17453 return false;
17454 // Don't mix temporal loads with non-temporal loads.
17455 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17456 return false;
17457 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17458 return false;
17459 break;
17461 case StoreSource::Constant:
17462 if (NoTypeMatch)
17463 return false;
17464 if (!isIntOrFPConstant(OtherBC))
17465 return false;
17466 break;
17467 case StoreSource::Extract:
17468 // Do not merge truncated stores here.
17469 if (Other->isTruncatingStore())
17470 return false;
17471 if (!MemVT.bitsEq(OtherBC.getValueType()))
17472 return false;
17473 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17474 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17475 return false;
17476 break;
17477 default:
17478 llvm_unreachable("Unhandled store source for merging");
17480 Ptr = BaseIndexOffset::match(Other, DAG);
17481 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17484 // Check if the pair of StoreNode and the RootNode already bail out many
17485 // times which is over the limit in dependence check.
17486 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17487 SDNode *RootNode) -> bool {
17488 auto RootCount = StoreRootCountMap.find(StoreNode);
17489 return RootCount != StoreRootCountMap.end() &&
17490 RootCount->second.first == RootNode &&
17491 RootCount->second.second > StoreMergeDependenceLimit;
17494 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17495 // This must be a chain use.
17496 if (UseIter.getOperandNo() != 0)
17497 return;
17498 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17499 BaseIndexOffset Ptr;
17500 int64_t PtrDiff;
17501 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17502 !OverLimitInDependenceCheck(OtherStore, RootNode))
17503 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17507 // We looking for a root node which is an ancestor to all mergable
17508 // stores. We search up through a load, to our root and then down
17509 // through all children. For instance we will find Store{1,2,3} if
17510 // St is Store1, Store2. or Store3 where the root is not a load
17511 // which always true for nonvolatile ops. TODO: Expand
17512 // the search to find all valid candidates through multiple layers of loads.
17514 // Root
17515 // |-------|-------|
17516 // Load Load Store3
17517 // | |
17518 // Store1 Store2
17520 // FIXME: We should be able to climb and
17521 // descend TokenFactors to find candidates as well.
17523 RootNode = St->getChain().getNode();
17525 unsigned NumNodesExplored = 0;
17526 const unsigned MaxSearchNodes = 1024;
17527 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17528 RootNode = Ldn->getChain().getNode();
17529 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17530 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17531 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17532 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17533 TryToAddCandidate(I2);
17535 // Check stores that depend on the root (e.g. Store 3 in the chart above).
17536 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17537 TryToAddCandidate(I);
17540 } else {
17541 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17542 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17543 TryToAddCandidate(I);
17547 // We need to check that merging these stores does not cause a loop in
17548 // the DAG. Any store candidate may depend on another candidate
17549 // indirectly through its operand (we already consider dependencies
17550 // through the chain). Check in parallel by searching up from
17551 // non-chain operands of candidates.
17552 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17553 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17554 SDNode *RootNode) {
17555 // FIXME: We should be able to truncate a full search of
17556 // predecessors by doing a BFS and keeping tabs the originating
17557 // stores from which worklist nodes come from in a similar way to
17558 // TokenFactor simplfication.
17560 SmallPtrSet<const SDNode *, 32> Visited;
17561 SmallVector<const SDNode *, 8> Worklist;
17563 // RootNode is a predecessor to all candidates so we need not search
17564 // past it. Add RootNode (peeking through TokenFactors). Do not count
17565 // these towards size check.
17567 Worklist.push_back(RootNode);
17568 while (!Worklist.empty()) {
17569 auto N = Worklist.pop_back_val();
17570 if (!Visited.insert(N).second)
17571 continue; // Already present in Visited.
17572 if (N->getOpcode() == ISD::TokenFactor) {
17573 for (SDValue Op : N->ops())
17574 Worklist.push_back(Op.getNode());
17578 // Don't count pruning nodes towards max.
17579 unsigned int Max = 1024 + Visited.size();
17580 // Search Ops of store candidates.
17581 for (unsigned i = 0; i < NumStores; ++i) {
17582 SDNode *N = StoreNodes[i].MemNode;
17583 // Of the 4 Store Operands:
17584 // * Chain (Op 0) -> We have already considered these
17585 // in candidate selection and can be
17586 // safely ignored
17587 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17588 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17589 // but aren't necessarily fromt the same base node, so
17590 // cycles possible (e.g. via indexed store).
17591 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17592 // non-indexed stores). Not constant on all targets (e.g. ARM)
17593 // and so can participate in a cycle.
17594 for (unsigned j = 1; j < N->getNumOperands(); ++j)
17595 Worklist.push_back(N->getOperand(j).getNode());
17597 // Search through DAG. We can stop early if we find a store node.
17598 for (unsigned i = 0; i < NumStores; ++i)
17599 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17600 Max)) {
17601 // If the searching bail out, record the StoreNode and RootNode in the
17602 // StoreRootCountMap. If we have seen the pair many times over a limit,
17603 // we won't add the StoreNode into StoreNodes set again.
17604 if (Visited.size() >= Max) {
17605 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17606 if (RootCount.first == RootNode)
17607 RootCount.second++;
17608 else
17609 RootCount = {RootNode, 1};
17611 return false;
17613 return true;
17616 unsigned
17617 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17618 int64_t ElementSizeBytes) const {
17619 while (true) {
17620 // Find a store past the width of the first store.
17621 size_t StartIdx = 0;
17622 while ((StartIdx + 1 < StoreNodes.size()) &&
17623 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17624 StoreNodes[StartIdx + 1].OffsetFromBase)
17625 ++StartIdx;
17627 // Bail if we don't have enough candidates to merge.
17628 if (StartIdx + 1 >= StoreNodes.size())
17629 return 0;
17631 // Trim stores that overlapped with the first store.
17632 if (StartIdx)
17633 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17635 // Scan the memory operations on the chain and find the first
17636 // non-consecutive store memory address.
17637 unsigned NumConsecutiveStores = 1;
17638 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17639 // Check that the addresses are consecutive starting from the second
17640 // element in the list of stores.
17641 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17642 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17643 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17644 break;
17645 NumConsecutiveStores = i + 1;
17647 if (NumConsecutiveStores > 1)
17648 return NumConsecutiveStores;
17650 // There are no consecutive stores at the start of the list.
17651 // Remove the first store and try again.
17652 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17656 bool DAGCombiner::tryStoreMergeOfConstants(
17657 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17658 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17659 LLVMContext &Context = *DAG.getContext();
17660 const DataLayout &DL = DAG.getDataLayout();
17661 int64_t ElementSizeBytes = MemVT.getStoreSize();
17662 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17663 bool MadeChange = false;
17665 // Store the constants into memory as one consecutive store.
17666 while (NumConsecutiveStores >= 2) {
17667 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17668 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17669 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17670 unsigned LastLegalType = 1;
17671 unsigned LastLegalVectorType = 1;
17672 bool LastIntegerTrunc = false;
17673 bool NonZero = false;
17674 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17675 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17676 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17677 SDValue StoredVal = ST->getValue();
17678 bool IsElementZero = false;
17679 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17680 IsElementZero = C->isZero();
17681 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17682 IsElementZero = C->getConstantFPValue()->isNullValue();
17683 if (IsElementZero) {
17684 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17685 FirstZeroAfterNonZero = i;
17687 NonZero |= !IsElementZero;
17689 // Find a legal type for the constant store.
17690 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17691 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17692 bool IsFast = false;
17694 // Break early when size is too large to be legal.
17695 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17696 break;
17698 if (TLI.isTypeLegal(StoreTy) &&
17699 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17700 DAG.getMachineFunction()) &&
17701 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17702 *FirstInChain->getMemOperand(), &IsFast) &&
17703 IsFast) {
17704 LastIntegerTrunc = false;
17705 LastLegalType = i + 1;
17706 // Or check whether a truncstore is legal.
17707 } else if (TLI.getTypeAction(Context, StoreTy) ==
17708 TargetLowering::TypePromoteInteger) {
17709 EVT LegalizedStoredValTy =
17710 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17711 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17712 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17713 DAG.getMachineFunction()) &&
17714 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17715 *FirstInChain->getMemOperand(), &IsFast) &&
17716 IsFast) {
17717 LastIntegerTrunc = true;
17718 LastLegalType = i + 1;
17722 // We only use vectors if the constant is known to be zero or the
17723 // target allows it and the function is not marked with the
17724 // noimplicitfloat attribute.
17725 if ((!NonZero ||
17726 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17727 AllowVectors) {
17728 // Find a legal type for the vector store.
17729 unsigned Elts = (i + 1) * NumMemElts;
17730 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17731 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17732 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17733 TLI.allowsMemoryAccess(Context, DL, Ty,
17734 *FirstInChain->getMemOperand(), &IsFast) &&
17735 IsFast)
17736 LastLegalVectorType = i + 1;
17740 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17741 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17742 bool UseTrunc = LastIntegerTrunc && !UseVector;
17744 // Check if we found a legal integer type that creates a meaningful
17745 // merge.
17746 if (NumElem < 2) {
17747 // We know that candidate stores are in order and of correct
17748 // shape. While there is no mergeable sequence from the
17749 // beginning one may start later in the sequence. The only
17750 // reason a merge of size N could have failed where another of
17751 // the same size would not have, is if the alignment has
17752 // improved or we've dropped a non-zero value. Drop as many
17753 // candidates as we can here.
17754 unsigned NumSkip = 1;
17755 while ((NumSkip < NumConsecutiveStores) &&
17756 (NumSkip < FirstZeroAfterNonZero) &&
17757 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17758 NumSkip++;
17760 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17761 NumConsecutiveStores -= NumSkip;
17762 continue;
17765 // Check that we can merge these candidates without causing a cycle.
17766 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17767 RootNode)) {
17768 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17769 NumConsecutiveStores -= NumElem;
17770 continue;
17773 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17774 /*IsConstantSrc*/ true,
17775 UseVector, UseTrunc);
17777 // Remove merged stores for next iteration.
17778 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17779 NumConsecutiveStores -= NumElem;
17781 return MadeChange;
17784 bool DAGCombiner::tryStoreMergeOfExtracts(
17785 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17786 EVT MemVT, SDNode *RootNode) {
17787 LLVMContext &Context = *DAG.getContext();
17788 const DataLayout &DL = DAG.getDataLayout();
17789 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17790 bool MadeChange = false;
17792 // Loop on Consecutive Stores on success.
17793 while (NumConsecutiveStores >= 2) {
17794 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17795 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17796 unsigned FirstStoreAlign = FirstInChain->getAlignment();
17797 unsigned NumStoresToMerge = 1;
17798 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17799 // Find a legal type for the vector store.
17800 unsigned Elts = (i + 1) * NumMemElts;
17801 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17802 bool IsFast = false;
17804 // Break early when size is too large to be legal.
17805 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17806 break;
17808 if (TLI.isTypeLegal(Ty) &&
17809 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17810 TLI.allowsMemoryAccess(Context, DL, Ty,
17811 *FirstInChain->getMemOperand(), &IsFast) &&
17812 IsFast)
17813 NumStoresToMerge = i + 1;
17816 // Check if we found a legal integer type creating a meaningful
17817 // merge.
17818 if (NumStoresToMerge < 2) {
17819 // We know that candidate stores are in order and of correct
17820 // shape. While there is no mergeable sequence from the
17821 // beginning one may start later in the sequence. The only
17822 // reason a merge of size N could have failed where another of
17823 // the same size would not have, is if the alignment has
17824 // improved. Drop as many candidates as we can here.
17825 unsigned NumSkip = 1;
17826 while ((NumSkip < NumConsecutiveStores) &&
17827 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17828 NumSkip++;
17830 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17831 NumConsecutiveStores -= NumSkip;
17832 continue;
17835 // Check that we can merge these candidates without causing a cycle.
17836 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17837 RootNode)) {
17838 StoreNodes.erase(StoreNodes.begin(),
17839 StoreNodes.begin() + NumStoresToMerge);
17840 NumConsecutiveStores -= NumStoresToMerge;
17841 continue;
17844 MadeChange |= mergeStoresOfConstantsOrVecElts(
17845 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17846 /*UseVector*/ true, /*UseTrunc*/ false);
17848 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17849 NumConsecutiveStores -= NumStoresToMerge;
17851 return MadeChange;
17854 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17855 unsigned NumConsecutiveStores, EVT MemVT,
17856 SDNode *RootNode, bool AllowVectors,
17857 bool IsNonTemporalStore,
17858 bool IsNonTemporalLoad) {
17859 LLVMContext &Context = *DAG.getContext();
17860 const DataLayout &DL = DAG.getDataLayout();
17861 int64_t ElementSizeBytes = MemVT.getStoreSize();
17862 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17863 bool MadeChange = false;
17865 // Look for load nodes which are used by the stored values.
17866 SmallVector<MemOpLink, 8> LoadNodes;
17868 // Find acceptable loads. Loads need to have the same chain (token factor),
17869 // must not be zext, volatile, indexed, and they must be consecutive.
17870 BaseIndexOffset LdBasePtr;
17872 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17873 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17874 SDValue Val = peekThroughBitcasts(St->getValue());
17875 LoadSDNode *Ld = cast<LoadSDNode>(Val);
17877 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17878 // If this is not the first ptr that we check.
17879 int64_t LdOffset = 0;
17880 if (LdBasePtr.getBase().getNode()) {
17881 // The base ptr must be the same.
17882 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17883 break;
17884 } else {
17885 // Check that all other base pointers are the same as this one.
17886 LdBasePtr = LdPtr;
17889 // We found a potential memory operand to merge.
17890 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17893 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17894 Align RequiredAlignment;
17895 bool NeedRotate = false;
17896 if (LoadNodes.size() == 2) {
17897 // If we have load/store pair instructions and we only have two values,
17898 // don't bother merging.
17899 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17900 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17901 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17902 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17903 break;
17905 // If the loads are reversed, see if we can rotate the halves into place.
17906 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17907 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17908 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17909 if (Offset0 - Offset1 == ElementSizeBytes &&
17910 (hasOperation(ISD::ROTL, PairVT) ||
17911 hasOperation(ISD::ROTR, PairVT))) {
17912 std::swap(LoadNodes[0], LoadNodes[1]);
17913 NeedRotate = true;
17916 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17917 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17918 Align FirstStoreAlign = FirstInChain->getAlign();
17919 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17921 // Scan the memory operations on the chain and find the first
17922 // non-consecutive load memory address. These variables hold the index in
17923 // the store node array.
17925 unsigned LastConsecutiveLoad = 1;
17927 // This variable refers to the size and not index in the array.
17928 unsigned LastLegalVectorType = 1;
17929 unsigned LastLegalIntegerType = 1;
17930 bool isDereferenceable = true;
17931 bool DoIntegerTruncate = false;
17932 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17933 SDValue LoadChain = FirstLoad->getChain();
17934 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17935 // All loads must share the same chain.
17936 if (LoadNodes[i].MemNode->getChain() != LoadChain)
17937 break;
17939 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17940 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17941 break;
17942 LastConsecutiveLoad = i;
17944 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17945 isDereferenceable = false;
17947 // Find a legal type for the vector store.
17948 unsigned Elts = (i + 1) * NumMemElts;
17949 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17951 // Break early when size is too large to be legal.
17952 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17953 break;
17955 bool IsFastSt = false;
17956 bool IsFastLd = false;
17957 // Don't try vector types if we need a rotate. We may still fail the
17958 // legality checks for the integer type, but we can't handle the rotate
17959 // case with vectors.
17960 // FIXME: We could use a shuffle in place of the rotate.
17961 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
17962 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17963 DAG.getMachineFunction()) &&
17964 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17965 *FirstInChain->getMemOperand(), &IsFastSt) &&
17966 IsFastSt &&
17967 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17968 *FirstLoad->getMemOperand(), &IsFastLd) &&
17969 IsFastLd) {
17970 LastLegalVectorType = i + 1;
17973 // Find a legal type for the integer store.
17974 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17975 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17976 if (TLI.isTypeLegal(StoreTy) &&
17977 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17978 DAG.getMachineFunction()) &&
17979 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17980 *FirstInChain->getMemOperand(), &IsFastSt) &&
17981 IsFastSt &&
17982 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17983 *FirstLoad->getMemOperand(), &IsFastLd) &&
17984 IsFastLd) {
17985 LastLegalIntegerType = i + 1;
17986 DoIntegerTruncate = false;
17987 // Or check whether a truncstore and extload is legal.
17988 } else if (TLI.getTypeAction(Context, StoreTy) ==
17989 TargetLowering::TypePromoteInteger) {
17990 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17991 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17992 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17993 DAG.getMachineFunction()) &&
17994 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17995 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17996 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17997 TLI.allowsMemoryAccess(Context, DL, StoreTy,
17998 *FirstInChain->getMemOperand(), &IsFastSt) &&
17999 IsFastSt &&
18000 TLI.allowsMemoryAccess(Context, DL, StoreTy,
18001 *FirstLoad->getMemOperand(), &IsFastLd) &&
18002 IsFastLd) {
18003 LastLegalIntegerType = i + 1;
18004 DoIntegerTruncate = true;
18009 // Only use vector types if the vector type is larger than the integer
18010 // type. If they are the same, use integers.
18011 bool UseVectorTy =
18012 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18013 unsigned LastLegalType =
18014 std::max(LastLegalVectorType, LastLegalIntegerType);
18016 // We add +1 here because the LastXXX variables refer to location while
18017 // the NumElem refers to array/index size.
18018 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18019 NumElem = std::min(LastLegalType, NumElem);
18020 Align FirstLoadAlign = FirstLoad->getAlign();
18022 if (NumElem < 2) {
18023 // We know that candidate stores are in order and of correct
18024 // shape. While there is no mergeable sequence from the
18025 // beginning one may start later in the sequence. The only
18026 // reason a merge of size N could have failed where another of
18027 // the same size would not have is if the alignment or either
18028 // the load or store has improved. Drop as many candidates as we
18029 // can here.
18030 unsigned NumSkip = 1;
18031 while ((NumSkip < LoadNodes.size()) &&
18032 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18033 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18034 NumSkip++;
18035 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18036 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18037 NumConsecutiveStores -= NumSkip;
18038 continue;
18041 // Check that we can merge these candidates without causing a cycle.
18042 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18043 RootNode)) {
18044 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18045 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18046 NumConsecutiveStores -= NumElem;
18047 continue;
18050 // Find if it is better to use vectors or integers to load and store
18051 // to memory.
18052 EVT JointMemOpVT;
18053 if (UseVectorTy) {
18054 // Find a legal type for the vector store.
18055 unsigned Elts = NumElem * NumMemElts;
18056 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18057 } else {
18058 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18059 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18062 SDLoc LoadDL(LoadNodes[0].MemNode);
18063 SDLoc StoreDL(StoreNodes[0].MemNode);
18065 // The merged loads are required to have the same incoming chain, so
18066 // using the first's chain is acceptable.
18068 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18069 AddToWorklist(NewStoreChain.getNode());
18071 MachineMemOperand::Flags LdMMOFlags =
18072 isDereferenceable ? MachineMemOperand::MODereferenceable
18073 : MachineMemOperand::MONone;
18074 if (IsNonTemporalLoad)
18075 LdMMOFlags |= MachineMemOperand::MONonTemporal;
18077 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18078 ? MachineMemOperand::MONonTemporal
18079 : MachineMemOperand::MONone;
18081 SDValue NewLoad, NewStore;
18082 if (UseVectorTy || !DoIntegerTruncate) {
18083 NewLoad = DAG.getLoad(
18084 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18085 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18086 SDValue StoreOp = NewLoad;
18087 if (NeedRotate) {
18088 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18089 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18090 "Unexpected type for rotate-able load pair");
18091 SDValue RotAmt =
18092 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18093 // Target can convert to the identical ROTR if it does not have ROTL.
18094 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18096 NewStore = DAG.getStore(
18097 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18098 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18099 } else { // This must be the truncstore/extload case
18100 EVT ExtendedTy =
18101 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18102 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18103 FirstLoad->getChain(), FirstLoad->getBasePtr(),
18104 FirstLoad->getPointerInfo(), JointMemOpVT,
18105 FirstLoadAlign, LdMMOFlags);
18106 NewStore = DAG.getTruncStore(
18107 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18108 FirstInChain->getPointerInfo(), JointMemOpVT,
18109 FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18112 // Transfer chain users from old loads to the new load.
18113 for (unsigned i = 0; i < NumElem; ++i) {
18114 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18115 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18116 SDValue(NewLoad.getNode(), 1));
18119 // Replace all stores with the new store. Recursively remove corresponding
18120 // values if they are no longer used.
18121 for (unsigned i = 0; i < NumElem; ++i) {
18122 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18123 CombineTo(StoreNodes[i].MemNode, NewStore);
18124 if (Val.getNode()->use_empty())
18125 recursivelyDeleteUnusedNodes(Val.getNode());
18128 MadeChange = true;
18129 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18130 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18131 NumConsecutiveStores -= NumElem;
18133 return MadeChange;
18136 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18137 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18138 return false;
18140 // TODO: Extend this function to merge stores of scalable vectors.
18141 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18142 // store since we know <vscale x 16 x i8> is exactly twice as large as
18143 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18144 EVT MemVT = St->getMemoryVT();
18145 if (MemVT.isScalableVector())
18146 return false;
18147 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18148 return false;
18150 // This function cannot currently deal with non-byte-sized memory sizes.
18151 int64_t ElementSizeBytes = MemVT.getStoreSize();
18152 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18153 return false;
18155 // Do not bother looking at stored values that are not constants, loads, or
18156 // extracted vector elements.
18157 SDValue StoredVal = peekThroughBitcasts(St->getValue());
18158 const StoreSource StoreSrc = getStoreSource(StoredVal);
18159 if (StoreSrc == StoreSource::Unknown)
18160 return false;
18162 SmallVector<MemOpLink, 8> StoreNodes;
18163 SDNode *RootNode;
18164 // Find potential store merge candidates by searching through chain sub-DAG
18165 getStoreMergeCandidates(St, StoreNodes, RootNode);
18167 // Check if there is anything to merge.
18168 if (StoreNodes.size() < 2)
18169 return false;
18171 // Sort the memory operands according to their distance from the
18172 // base pointer.
18173 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18174 return LHS.OffsetFromBase < RHS.OffsetFromBase;
18177 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18178 Attribute::NoImplicitFloat);
18179 bool IsNonTemporalStore = St->isNonTemporal();
18180 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18181 cast<LoadSDNode>(StoredVal)->isNonTemporal();
18183 // Store Merge attempts to merge the lowest stores. This generally
18184 // works out as if successful, as the remaining stores are checked
18185 // after the first collection of stores is merged. However, in the
18186 // case that a non-mergeable store is found first, e.g., {p[-2],
18187 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18188 // mergeable cases. To prevent this, we prune such stores from the
18189 // front of StoreNodes here.
18190 bool MadeChange = false;
18191 while (StoreNodes.size() > 1) {
18192 unsigned NumConsecutiveStores =
18193 getConsecutiveStores(StoreNodes, ElementSizeBytes);
18194 // There are no more stores in the list to examine.
18195 if (NumConsecutiveStores == 0)
18196 return MadeChange;
18198 // We have at least 2 consecutive stores. Try to merge them.
18199 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18200 switch (StoreSrc) {
18201 case StoreSource::Constant:
18202 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18203 MemVT, RootNode, AllowVectors);
18204 break;
18206 case StoreSource::Extract:
18207 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18208 MemVT, RootNode);
18209 break;
18211 case StoreSource::Load:
18212 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18213 MemVT, RootNode, AllowVectors,
18214 IsNonTemporalStore, IsNonTemporalLoad);
18215 break;
18217 default:
18218 llvm_unreachable("Unhandled store source type");
18221 return MadeChange;
18224 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18225 SDLoc SL(ST);
18226 SDValue ReplStore;
18228 // Replace the chain to avoid dependency.
18229 if (ST->isTruncatingStore()) {
18230 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18231 ST->getBasePtr(), ST->getMemoryVT(),
18232 ST->getMemOperand());
18233 } else {
18234 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18235 ST->getMemOperand());
18238 // Create token to keep both nodes around.
18239 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18240 MVT::Other, ST->getChain(), ReplStore);
18242 // Make sure the new and old chains are cleaned up.
18243 AddToWorklist(Token.getNode());
18245 // Don't add users to work list.
18246 return CombineTo(ST, Token, false);
18249 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18250 SDValue Value = ST->getValue();
18251 if (Value.getOpcode() == ISD::TargetConstantFP)
18252 return SDValue();
18254 if (!ISD::isNormalStore(ST))
18255 return SDValue();
18257 SDLoc DL(ST);
18259 SDValue Chain = ST->getChain();
18260 SDValue Ptr = ST->getBasePtr();
18262 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18264 // NOTE: If the original store is volatile, this transform must not increase
18265 // the number of stores. For example, on x86-32 an f64 can be stored in one
18266 // processor operation but an i64 (which is not legal) requires two. So the
18267 // transform should not be done in this case.
18269 SDValue Tmp;
18270 switch (CFP->getSimpleValueType(0).SimpleTy) {
18271 default:
18272 llvm_unreachable("Unknown FP type");
18273 case MVT::f16: // We don't do this for these yet.
18274 case MVT::f80:
18275 case MVT::f128:
18276 case MVT::ppcf128:
18277 return SDValue();
18278 case MVT::f32:
18279 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18280 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18282 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18283 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18284 MVT::i32);
18285 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18288 return SDValue();
18289 case MVT::f64:
18290 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18291 ST->isSimple()) ||
18292 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18294 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18295 getZExtValue(), SDLoc(CFP), MVT::i64);
18296 return DAG.getStore(Chain, DL, Tmp,
18297 Ptr, ST->getMemOperand());
18300 if (ST->isSimple() &&
18301 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18302 // Many FP stores are not made apparent until after legalize, e.g. for
18303 // argument passing. Since this is so common, custom legalize the
18304 // 64-bit integer store into two 32-bit stores.
18305 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18306 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18307 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18308 if (DAG.getDataLayout().isBigEndian())
18309 std::swap(Lo, Hi);
18311 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18312 AAMDNodes AAInfo = ST->getAAInfo();
18314 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18315 ST->getOriginalAlign(), MMOFlags, AAInfo);
18316 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18317 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18318 ST->getPointerInfo().getWithOffset(4),
18319 ST->getOriginalAlign(), MMOFlags, AAInfo);
18320 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18321 St0, St1);
18324 return SDValue();
18328 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18329 StoreSDNode *ST = cast<StoreSDNode>(N);
18330 SDValue Chain = ST->getChain();
18331 SDValue Value = ST->getValue();
18332 SDValue Ptr = ST->getBasePtr();
18334 // If this is a store of a bit convert, store the input value if the
18335 // resultant store does not need a higher alignment than the original.
18336 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18337 ST->isUnindexed()) {
18338 EVT SVT = Value.getOperand(0).getValueType();
18339 // If the store is volatile, we only want to change the store type if the
18340 // resulting store is legal. Otherwise we might increase the number of
18341 // memory accesses. We don't care if the original type was legal or not
18342 // as we assume software couldn't rely on the number of accesses of an
18343 // illegal type.
18344 // TODO: May be able to relax for unordered atomics (see D66309)
18345 if (((!LegalOperations && ST->isSimple()) ||
18346 TLI.isOperationLegal(ISD::STORE, SVT)) &&
18347 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18348 DAG, *ST->getMemOperand())) {
18349 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18350 ST->getMemOperand());
18354 // Turn 'store undef, Ptr' -> nothing.
18355 if (Value.isUndef() && ST->isUnindexed())
18356 return Chain;
18358 // Try to infer better alignment information than the store already has.
18359 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18360 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18361 if (*Alignment > ST->getAlign() &&
18362 isAligned(*Alignment, ST->getSrcValueOffset())) {
18363 SDValue NewStore =
18364 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18365 ST->getMemoryVT(), *Alignment,
18366 ST->getMemOperand()->getFlags(), ST->getAAInfo());
18367 // NewStore will always be N as we are only refining the alignment
18368 assert(NewStore.getNode() == N);
18369 (void)NewStore;
18374 // Try transforming a pair floating point load / store ops to integer
18375 // load / store ops.
18376 if (SDValue NewST = TransformFPLoadStorePair(N))
18377 return NewST;
18379 // Try transforming several stores into STORE (BSWAP).
18380 if (SDValue Store = mergeTruncStores(ST))
18381 return Store;
18383 if (ST->isUnindexed()) {
18384 // Walk up chain skipping non-aliasing memory nodes, on this store and any
18385 // adjacent stores.
18386 if (findBetterNeighborChains(ST)) {
18387 // replaceStoreChain uses CombineTo, which handled all of the worklist
18388 // manipulation. Return the original node to not do anything else.
18389 return SDValue(ST, 0);
18391 Chain = ST->getChain();
18394 // FIXME: is there such a thing as a truncating indexed store?
18395 if (ST->isTruncatingStore() && ST->isUnindexed() &&
18396 Value.getValueType().isInteger() &&
18397 (!isa<ConstantSDNode>(Value) ||
18398 !cast<ConstantSDNode>(Value)->isOpaque())) {
18399 APInt TruncDemandedBits =
18400 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18401 ST->getMemoryVT().getScalarSizeInBits());
18403 // See if we can simplify the input to this truncstore with knowledge that
18404 // only the low bits are being used. For example:
18405 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
18406 AddToWorklist(Value.getNode());
18407 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18408 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18409 ST->getMemOperand());
18411 // Otherwise, see if we can simplify the operation with
18412 // SimplifyDemandedBits, which only works if the value has a single use.
18413 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18414 // Re-visit the store if anything changed and the store hasn't been merged
18415 // with another node (N is deleted) SimplifyDemandedBits will add Value's
18416 // node back to the worklist if necessary, but we also need to re-visit
18417 // the Store node itself.
18418 if (N->getOpcode() != ISD::DELETED_NODE)
18419 AddToWorklist(N);
18420 return SDValue(N, 0);
18424 // If this is a load followed by a store to the same location, then the store
18425 // is dead/noop.
18426 // TODO: Can relax for unordered atomics (see D66309)
18427 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18428 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18429 ST->isUnindexed() && ST->isSimple() &&
18430 Ld->getAddressSpace() == ST->getAddressSpace() &&
18431 // There can't be any side effects between the load and store, such as
18432 // a call or store.
18433 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18434 // The store is dead, remove it.
18435 return Chain;
18439 // TODO: Can relax for unordered atomics (see D66309)
18440 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18441 if (ST->isUnindexed() && ST->isSimple() &&
18442 ST1->isUnindexed() && ST1->isSimple()) {
18443 if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18444 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18445 ST->getAddressSpace() == ST1->getAddressSpace()) {
18446 // If this is a store followed by a store with the same value to the
18447 // same location, then the store is dead/noop.
18448 return Chain;
18451 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18452 !ST1->getBasePtr().isUndef() &&
18453 // BaseIndexOffset and the code below requires knowing the size
18454 // of a vector, so bail out if MemoryVT is scalable.
18455 !ST->getMemoryVT().isScalableVector() &&
18456 !ST1->getMemoryVT().isScalableVector() &&
18457 ST->getAddressSpace() == ST1->getAddressSpace()) {
18458 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18459 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18460 unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18461 unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18462 // If this is a store who's preceding store to a subset of the current
18463 // location and no one other node is chained to that store we can
18464 // effectively drop the store. Do not remove stores to undef as they may
18465 // be used as data sinks.
18466 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18467 CombineTo(ST1, ST1->getChain());
18468 return SDValue();
18474 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18475 // truncating store. We can do this even if this is already a truncstore.
18476 if ((Value.getOpcode() == ISD::FP_ROUND ||
18477 Value.getOpcode() == ISD::TRUNCATE) &&
18478 Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18479 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18480 ST->getMemoryVT(), LegalOperations)) {
18481 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18482 Ptr, ST->getMemoryVT(), ST->getMemOperand());
18485 // Always perform this optimization before types are legal. If the target
18486 // prefers, also try this after legalization to catch stores that were created
18487 // by intrinsics or other nodes.
18488 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18489 while (true) {
18490 // There can be multiple store sequences on the same chain.
18491 // Keep trying to merge store sequences until we are unable to do so
18492 // or until we merge the last store on the chain.
18493 bool Changed = mergeConsecutiveStores(ST);
18494 if (!Changed) break;
18495 // Return N as merge only uses CombineTo and no worklist clean
18496 // up is necessary.
18497 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18498 return SDValue(N, 0);
18502 // Try transforming N to an indexed store.
18503 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18504 return SDValue(N, 0);
18506 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18508 // Make sure to do this only after attempting to merge stores in order to
18509 // avoid changing the types of some subset of stores due to visit order,
18510 // preventing their merging.
18511 if (isa<ConstantFPSDNode>(ST->getValue())) {
18512 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18513 return NewSt;
18516 if (SDValue NewSt = splitMergedValStore(ST))
18517 return NewSt;
18519 return ReduceLoadOpStoreWidth(N);
18522 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18523 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18524 if (!LifetimeEnd->hasOffset())
18525 return SDValue();
18527 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18528 LifetimeEnd->getOffset(), false);
18530 // We walk up the chains to find stores.
18531 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18532 while (!Chains.empty()) {
18533 SDValue Chain = Chains.pop_back_val();
18534 if (!Chain.hasOneUse())
18535 continue;
18536 switch (Chain.getOpcode()) {
18537 case ISD::TokenFactor:
18538 for (unsigned Nops = Chain.getNumOperands(); Nops;)
18539 Chains.push_back(Chain.getOperand(--Nops));
18540 break;
18541 case ISD::LIFETIME_START:
18542 case ISD::LIFETIME_END:
18543 // We can forward past any lifetime start/end that can be proven not to
18544 // alias the node.
18545 if (!mayAlias(Chain.getNode(), N))
18546 Chains.push_back(Chain.getOperand(0));
18547 break;
18548 case ISD::STORE: {
18549 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18550 // TODO: Can relax for unordered atomics (see D66309)
18551 if (!ST->isSimple() || ST->isIndexed())
18552 continue;
18553 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18554 // The bounds of a scalable store are not known until runtime, so this
18555 // store cannot be elided.
18556 if (StoreSize.isScalable())
18557 continue;
18558 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18559 // If we store purely within object bounds just before its lifetime ends,
18560 // we can remove the store.
18561 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18562 StoreSize.getFixedSize() * 8)) {
18563 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18564 dbgs() << "\nwithin LIFETIME_END of : ";
18565 LifetimeEndBase.dump(); dbgs() << "\n");
18566 CombineTo(ST, ST->getChain());
18567 return SDValue(N, 0);
18572 return SDValue();
18575 /// For the instruction sequence of store below, F and I values
18576 /// are bundled together as an i64 value before being stored into memory.
18577 /// Sometimes it is more efficent to generate separate stores for F and I,
18578 /// which can remove the bitwise instructions or sink them to colder places.
18580 /// (store (or (zext (bitcast F to i32) to i64),
18581 /// (shl (zext I to i64), 32)), addr) -->
18582 /// (store F, addr) and (store I, addr+4)
18584 /// Similarly, splitting for other merged store can also be beneficial, like:
18585 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18586 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18587 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18588 /// For pair of {i16, i8}, i32 store --> two i16 stores.
18589 /// For pair of {i8, i8}, i16 store --> two i8 stores.
18591 /// We allow each target to determine specifically which kind of splitting is
18592 /// supported.
18594 /// The store patterns are commonly seen from the simple code snippet below
18595 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18596 /// void goo(const std::pair<int, float> &);
18597 /// hoo() {
18598 /// ...
18599 /// goo(std::make_pair(tmp, ftmp));
18600 /// ...
18601 /// }
18603 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18604 if (OptLevel == CodeGenOpt::None)
18605 return SDValue();
18607 // Can't change the number of memory accesses for a volatile store or break
18608 // atomicity for an atomic one.
18609 if (!ST->isSimple())
18610 return SDValue();
18612 SDValue Val = ST->getValue();
18613 SDLoc DL(ST);
18615 // Match OR operand.
18616 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18617 return SDValue();
18619 // Match SHL operand and get Lower and Higher parts of Val.
18620 SDValue Op1 = Val.getOperand(0);
18621 SDValue Op2 = Val.getOperand(1);
18622 SDValue Lo, Hi;
18623 if (Op1.getOpcode() != ISD::SHL) {
18624 std::swap(Op1, Op2);
18625 if (Op1.getOpcode() != ISD::SHL)
18626 return SDValue();
18628 Lo = Op2;
18629 Hi = Op1.getOperand(0);
18630 if (!Op1.hasOneUse())
18631 return SDValue();
18633 // Match shift amount to HalfValBitSize.
18634 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18635 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18636 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18637 return SDValue();
18639 // Lo and Hi are zero-extended from int with size less equal than 32
18640 // to i64.
18641 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18642 !Lo.getOperand(0).getValueType().isScalarInteger() ||
18643 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18644 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18645 !Hi.getOperand(0).getValueType().isScalarInteger() ||
18646 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18647 return SDValue();
18649 // Use the EVT of low and high parts before bitcast as the input
18650 // of target query.
18651 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18652 ? Lo.getOperand(0).getValueType()
18653 : Lo.getValueType();
18654 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18655 ? Hi.getOperand(0).getValueType()
18656 : Hi.getValueType();
18657 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18658 return SDValue();
18660 // Start to split store.
18661 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18662 AAMDNodes AAInfo = ST->getAAInfo();
18664 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18665 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18666 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18667 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18669 SDValue Chain = ST->getChain();
18670 SDValue Ptr = ST->getBasePtr();
18671 // Lower value store.
18672 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18673 ST->getOriginalAlign(), MMOFlags, AAInfo);
18674 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18675 // Higher value store.
18676 SDValue St1 = DAG.getStore(
18677 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18678 ST->getOriginalAlign(), MMOFlags, AAInfo);
18679 return St1;
18682 /// Convert a disguised subvector insertion into a shuffle:
18683 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18684 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18685 "Expected extract_vector_elt");
18686 SDValue InsertVal = N->getOperand(1);
18687 SDValue Vec = N->getOperand(0);
18689 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18690 // InsIndex)
18691 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
18692 // CONCAT_VECTORS.
18693 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18694 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18695 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18696 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18697 ArrayRef<int> Mask = SVN->getMask();
18699 SDValue X = Vec.getOperand(0);
18700 SDValue Y = Vec.getOperand(1);
18702 // Vec's operand 0 is using indices from 0 to N-1 and
18703 // operand 1 from N to 2N - 1, where N is the number of
18704 // elements in the vectors.
18705 SDValue InsertVal0 = InsertVal.getOperand(0);
18706 int ElementOffset = -1;
18708 // We explore the inputs of the shuffle in order to see if we find the
18709 // source of the extract_vector_elt. If so, we can use it to modify the
18710 // shuffle rather than perform an insert_vector_elt.
18711 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18712 ArgWorkList.emplace_back(Mask.size(), Y);
18713 ArgWorkList.emplace_back(0, X);
18715 while (!ArgWorkList.empty()) {
18716 int ArgOffset;
18717 SDValue ArgVal;
18718 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18720 if (ArgVal == InsertVal0) {
18721 ElementOffset = ArgOffset;
18722 break;
18725 // Peek through concat_vector.
18726 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18727 int CurrentArgOffset =
18728 ArgOffset + ArgVal.getValueType().getVectorNumElements();
18729 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18730 for (SDValue Op : reverse(ArgVal->ops())) {
18731 CurrentArgOffset -= Step;
18732 ArgWorkList.emplace_back(CurrentArgOffset, Op);
18735 // Make sure we went through all the elements and did not screw up index
18736 // computation.
18737 assert(CurrentArgOffset == ArgOffset);
18741 if (ElementOffset != -1) {
18742 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18744 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18745 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18746 assert(NewMask[InsIndex] <
18747 (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18748 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18750 SDValue LegalShuffle =
18751 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18752 Y, NewMask, DAG);
18753 if (LegalShuffle)
18754 return LegalShuffle;
18758 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18759 // bitcast(shuffle (bitcast V), (extended X), Mask)
18760 // Note: We do not use an insert_subvector node because that requires a
18761 // legal subvector type.
18762 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18763 !InsertVal.getOperand(0).getValueType().isVector())
18764 return SDValue();
18766 SDValue SubVec = InsertVal.getOperand(0);
18767 SDValue DestVec = N->getOperand(0);
18768 EVT SubVecVT = SubVec.getValueType();
18769 EVT VT = DestVec.getValueType();
18770 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18771 // If the source only has a single vector element, the cost of creating adding
18772 // it to a vector is likely to exceed the cost of a insert_vector_elt.
18773 if (NumSrcElts == 1)
18774 return SDValue();
18775 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18776 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18778 // Step 1: Create a shuffle mask that implements this insert operation. The
18779 // vector that we are inserting into will be operand 0 of the shuffle, so
18780 // those elements are just 'i'. The inserted subvector is in the first
18781 // positions of operand 1 of the shuffle. Example:
18782 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18783 SmallVector<int, 16> Mask(NumMaskVals);
18784 for (unsigned i = 0; i != NumMaskVals; ++i) {
18785 if (i / NumSrcElts == InsIndex)
18786 Mask[i] = (i % NumSrcElts) + NumMaskVals;
18787 else
18788 Mask[i] = i;
18791 // Bail out if the target can not handle the shuffle we want to create.
18792 EVT SubVecEltVT = SubVecVT.getVectorElementType();
18793 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18794 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18795 return SDValue();
18797 // Step 2: Create a wide vector from the inserted source vector by appending
18798 // undefined elements. This is the same size as our destination vector.
18799 SDLoc DL(N);
18800 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18801 ConcatOps[0] = SubVec;
18802 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18804 // Step 3: Shuffle in the padded subvector.
18805 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18806 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18807 AddToWorklist(PaddedSubV.getNode());
18808 AddToWorklist(DestVecBC.getNode());
18809 AddToWorklist(Shuf.getNode());
18810 return DAG.getBitcast(VT, Shuf);
18813 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18814 SDValue InVec = N->getOperand(0);
18815 SDValue InVal = N->getOperand(1);
18816 SDValue EltNo = N->getOperand(2);
18817 SDLoc DL(N);
18819 EVT VT = InVec.getValueType();
18820 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18822 // Insert into out-of-bounds element is undefined.
18823 if (IndexC && VT.isFixedLengthVector() &&
18824 IndexC->getZExtValue() >= VT.getVectorNumElements())
18825 return DAG.getUNDEF(VT);
18827 // Remove redundant insertions:
18828 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18829 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18830 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18831 return InVec;
18833 if (!IndexC) {
18834 // If this is variable insert to undef vector, it might be better to splat:
18835 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18836 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18837 if (VT.isScalableVector())
18838 return DAG.getSplatVector(VT, DL, InVal);
18839 else {
18840 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18841 return DAG.getBuildVector(VT, DL, Ops);
18844 return SDValue();
18847 if (VT.isScalableVector())
18848 return SDValue();
18850 unsigned NumElts = VT.getVectorNumElements();
18852 // We must know which element is being inserted for folds below here.
18853 unsigned Elt = IndexC->getZExtValue();
18854 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18855 return Shuf;
18857 // Canonicalize insert_vector_elt dag nodes.
18858 // Example:
18859 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18860 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18862 // Do this only if the child insert_vector node has one use; also
18863 // do this only if indices are both constants and Idx1 < Idx0.
18864 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18865 && isa<ConstantSDNode>(InVec.getOperand(2))) {
18866 unsigned OtherElt = InVec.getConstantOperandVal(2);
18867 if (Elt < OtherElt) {
18868 // Swap nodes.
18869 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18870 InVec.getOperand(0), InVal, EltNo);
18871 AddToWorklist(NewOp.getNode());
18872 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18873 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18877 // If we can't generate a legal BUILD_VECTOR, exit
18878 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18879 return SDValue();
18881 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18882 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
18883 // vector elements.
18884 SmallVector<SDValue, 8> Ops;
18885 // Do not combine these two vectors if the output vector will not replace
18886 // the input vector.
18887 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18888 Ops.append(InVec.getNode()->op_begin(),
18889 InVec.getNode()->op_end());
18890 } else if (InVec.isUndef()) {
18891 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18892 } else {
18893 return SDValue();
18895 assert(Ops.size() == NumElts && "Unexpected vector size");
18897 // Insert the element
18898 if (Elt < Ops.size()) {
18899 // All the operands of BUILD_VECTOR must have the same type;
18900 // we enforce that here.
18901 EVT OpVT = Ops[0].getValueType();
18902 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18905 // Return the new vector
18906 return DAG.getBuildVector(VT, DL, Ops);
18909 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18910 SDValue EltNo,
18911 LoadSDNode *OriginalLoad) {
18912 assert(OriginalLoad->isSimple());
18914 EVT ResultVT = EVE->getValueType(0);
18915 EVT VecEltVT = InVecVT.getVectorElementType();
18917 // If the vector element type is not a multiple of a byte then we are unable
18918 // to correctly compute an address to load only the extracted element as a
18919 // scalar.
18920 if (!VecEltVT.isByteSized())
18921 return SDValue();
18923 ISD::LoadExtType ExtTy =
18924 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
18925 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
18926 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18927 return SDValue();
18929 Align Alignment = OriginalLoad->getAlign();
18930 MachinePointerInfo MPI;
18931 SDLoc DL(EVE);
18932 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18933 int Elt = ConstEltNo->getZExtValue();
18934 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18935 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18936 Alignment = commonAlignment(Alignment, PtrOff);
18937 } else {
18938 // Discard the pointer info except the address space because the memory
18939 // operand can't represent this new access since the offset is variable.
18940 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18941 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
18944 bool IsFast = false;
18945 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
18946 OriginalLoad->getAddressSpace(), Alignment,
18947 OriginalLoad->getMemOperand()->getFlags(),
18948 &IsFast) ||
18949 !IsFast)
18950 return SDValue();
18952 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18953 InVecVT, EltNo);
18955 // The replacement we need to do here is a little tricky: we need to
18956 // replace an extractelement of a load with a load.
18957 // Use ReplaceAllUsesOfValuesWith to do the replacement.
18958 // Note that this replacement assumes that the extractvalue is the only
18959 // use of the load; that's okay because we don't want to perform this
18960 // transformation in other cases anyway.
18961 SDValue Load;
18962 SDValue Chain;
18963 if (ResultVT.bitsGT(VecEltVT)) {
18964 // If the result type of vextract is wider than the load, then issue an
18965 // extending load instead.
18966 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18967 VecEltVT)
18968 ? ISD::ZEXTLOAD
18969 : ISD::EXTLOAD;
18970 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18971 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18972 Alignment, OriginalLoad->getMemOperand()->getFlags(),
18973 OriginalLoad->getAAInfo());
18974 Chain = Load.getValue(1);
18975 } else {
18976 Load = DAG.getLoad(
18977 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18978 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18979 Chain = Load.getValue(1);
18980 if (ResultVT.bitsLT(VecEltVT))
18981 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18982 else
18983 Load = DAG.getBitcast(ResultVT, Load);
18985 WorklistRemover DeadNodes(*this);
18986 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18987 SDValue To[] = { Load, Chain };
18988 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18989 // Make sure to revisit this node to clean it up; it will usually be dead.
18990 AddToWorklist(EVE);
18991 // Since we're explicitly calling ReplaceAllUses, add the new node to the
18992 // worklist explicitly as well.
18993 AddToWorklistWithUsers(Load.getNode());
18994 ++OpsNarrowed;
18995 return SDValue(EVE, 0);
18998 /// Transform a vector binary operation into a scalar binary operation by moving
18999 /// the math/logic after an extract element of a vector.
19000 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19001 bool LegalOperations) {
19002 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19003 SDValue Vec = ExtElt->getOperand(0);
19004 SDValue Index = ExtElt->getOperand(1);
19005 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19006 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19007 Vec.getNode()->getNumValues() != 1)
19008 return SDValue();
19010 // Targets may want to avoid this to prevent an expensive register transfer.
19011 if (!TLI.shouldScalarizeBinop(Vec))
19012 return SDValue();
19014 // Extracting an element of a vector constant is constant-folded, so this
19015 // transform is just replacing a vector op with a scalar op while moving the
19016 // extract.
19017 SDValue Op0 = Vec.getOperand(0);
19018 SDValue Op1 = Vec.getOperand(1);
19019 if (isAnyConstantBuildVector(Op0, true) ||
19020 isAnyConstantBuildVector(Op1, true)) {
19021 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19022 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19023 SDLoc DL(ExtElt);
19024 EVT VT = ExtElt->getValueType(0);
19025 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19026 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19027 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19030 return SDValue();
19033 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19034 SDValue VecOp = N->getOperand(0);
19035 SDValue Index = N->getOperand(1);
19036 EVT ScalarVT = N->getValueType(0);
19037 EVT VecVT = VecOp.getValueType();
19038 if (VecOp.isUndef())
19039 return DAG.getUNDEF(ScalarVT);
19041 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19043 // This only really matters if the index is non-constant since other combines
19044 // on the constant elements already work.
19045 SDLoc DL(N);
19046 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19047 Index == VecOp.getOperand(2)) {
19048 SDValue Elt = VecOp.getOperand(1);
19049 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19052 // (vextract (scalar_to_vector val, 0) -> val
19053 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19054 // Only 0'th element of SCALAR_TO_VECTOR is defined.
19055 if (DAG.isKnownNeverZero(Index))
19056 return DAG.getUNDEF(ScalarVT);
19058 // Check if the result type doesn't match the inserted element type. A
19059 // SCALAR_TO_VECTOR may truncate the inserted element and the
19060 // EXTRACT_VECTOR_ELT may widen the extracted vector.
19061 SDValue InOp = VecOp.getOperand(0);
19062 if (InOp.getValueType() != ScalarVT) {
19063 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19064 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19066 return InOp;
19069 // extract_vector_elt of out-of-bounds element -> UNDEF
19070 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19071 if (IndexC && VecVT.isFixedLengthVector() &&
19072 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19073 return DAG.getUNDEF(ScalarVT);
19075 // extract_vector_elt (build_vector x, y), 1 -> y
19076 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19077 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19078 TLI.isTypeLegal(VecVT) &&
19079 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19080 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19081 VecVT.isFixedLengthVector()) &&
19082 "BUILD_VECTOR used for scalable vectors");
19083 unsigned IndexVal =
19084 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19085 SDValue Elt = VecOp.getOperand(IndexVal);
19086 EVT InEltVT = Elt.getValueType();
19088 // Sometimes build_vector's scalar input types do not match result type.
19089 if (ScalarVT == InEltVT)
19090 return Elt;
19092 // TODO: It may be useful to truncate if free if the build_vector implicitly
19093 // converts.
19096 if (VecVT.isScalableVector())
19097 return SDValue();
19099 // All the code from this point onwards assumes fixed width vectors, but it's
19100 // possible that some of the combinations could be made to work for scalable
19101 // vectors too.
19102 unsigned NumElts = VecVT.getVectorNumElements();
19103 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19105 // TODO: These transforms should not require the 'hasOneUse' restriction, but
19106 // there are regressions on multiple targets without it. We can end up with a
19107 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19108 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19109 VecOp.hasOneUse()) {
19110 // The vector index of the LSBs of the source depend on the endian-ness.
19111 bool IsLE = DAG.getDataLayout().isLittleEndian();
19112 unsigned ExtractIndex = IndexC->getZExtValue();
19113 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19114 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19115 SDValue BCSrc = VecOp.getOperand(0);
19116 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19117 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19119 if (LegalTypes && BCSrc.getValueType().isInteger() &&
19120 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19121 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19122 // trunc i64 X to i32
19123 SDValue X = BCSrc.getOperand(0);
19124 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19125 "Extract element and scalar to vector can't change element type "
19126 "from FP to integer.");
19127 unsigned XBitWidth = X.getValueSizeInBits();
19128 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19130 // An extract element return value type can be wider than its vector
19131 // operand element type. In that case, the high bits are undefined, so
19132 // it's possible that we may need to extend rather than truncate.
19133 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19134 assert(XBitWidth % VecEltBitWidth == 0 &&
19135 "Scalar bitwidth must be a multiple of vector element bitwidth");
19136 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19141 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19142 return BO;
19144 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19145 // We only perform this optimization before the op legalization phase because
19146 // we may introduce new vector instructions which are not backed by TD
19147 // patterns. For example on AVX, extracting elements from a wide vector
19148 // without using extract_subvector. However, if we can find an underlying
19149 // scalar value, then we can always use that.
19150 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19151 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19152 // Find the new index to extract from.
19153 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19155 // Extracting an undef index is undef.
19156 if (OrigElt == -1)
19157 return DAG.getUNDEF(ScalarVT);
19159 // Select the right vector half to extract from.
19160 SDValue SVInVec;
19161 if (OrigElt < (int)NumElts) {
19162 SVInVec = VecOp.getOperand(0);
19163 } else {
19164 SVInVec = VecOp.getOperand(1);
19165 OrigElt -= NumElts;
19168 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19169 SDValue InOp = SVInVec.getOperand(OrigElt);
19170 if (InOp.getValueType() != ScalarVT) {
19171 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19172 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19175 return InOp;
19178 // FIXME: We should handle recursing on other vector shuffles and
19179 // scalar_to_vector here as well.
19181 if (!LegalOperations ||
19182 // FIXME: Should really be just isOperationLegalOrCustom.
19183 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19184 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19185 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19186 DAG.getVectorIdxConstant(OrigElt, DL));
19190 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19191 // simplify it based on the (valid) extraction indices.
19192 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19193 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19194 Use->getOperand(0) == VecOp &&
19195 isa<ConstantSDNode>(Use->getOperand(1));
19196 })) {
19197 APInt DemandedElts = APInt::getZero(NumElts);
19198 for (SDNode *Use : VecOp->uses()) {
19199 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19200 if (CstElt->getAPIntValue().ult(NumElts))
19201 DemandedElts.setBit(CstElt->getZExtValue());
19203 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19204 // We simplified the vector operand of this extract element. If this
19205 // extract is not dead, visit it again so it is folded properly.
19206 if (N->getOpcode() != ISD::DELETED_NODE)
19207 AddToWorklist(N);
19208 return SDValue(N, 0);
19210 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19211 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19212 // We simplified the vector operand of this extract element. If this
19213 // extract is not dead, visit it again so it is folded properly.
19214 if (N->getOpcode() != ISD::DELETED_NODE)
19215 AddToWorklist(N);
19216 return SDValue(N, 0);
19220 // Everything under here is trying to match an extract of a loaded value.
19221 // If the result of load has to be truncated, then it's not necessarily
19222 // profitable.
19223 bool BCNumEltsChanged = false;
19224 EVT ExtVT = VecVT.getVectorElementType();
19225 EVT LVT = ExtVT;
19226 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19227 return SDValue();
19229 if (VecOp.getOpcode() == ISD::BITCAST) {
19230 // Don't duplicate a load with other uses.
19231 if (!VecOp.hasOneUse())
19232 return SDValue();
19234 EVT BCVT = VecOp.getOperand(0).getValueType();
19235 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19236 return SDValue();
19237 if (NumElts != BCVT.getVectorNumElements())
19238 BCNumEltsChanged = true;
19239 VecOp = VecOp.getOperand(0);
19240 ExtVT = BCVT.getVectorElementType();
19243 // extract (vector load $addr), i --> load $addr + i * size
19244 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19245 ISD::isNormalLoad(VecOp.getNode()) &&
19246 !Index->hasPredecessor(VecOp.getNode())) {
19247 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19248 if (VecLoad && VecLoad->isSimple())
19249 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19252 // Perform only after legalization to ensure build_vector / vector_shuffle
19253 // optimizations have already been done.
19254 if (!LegalOperations || !IndexC)
19255 return SDValue();
19257 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19258 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19259 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19260 int Elt = IndexC->getZExtValue();
19261 LoadSDNode *LN0 = nullptr;
19262 if (ISD::isNormalLoad(VecOp.getNode())) {
19263 LN0 = cast<LoadSDNode>(VecOp);
19264 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19265 VecOp.getOperand(0).getValueType() == ExtVT &&
19266 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19267 // Don't duplicate a load with other uses.
19268 if (!VecOp.hasOneUse())
19269 return SDValue();
19271 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19273 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19274 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19275 // =>
19276 // (load $addr+1*size)
19278 // Don't duplicate a load with other uses.
19279 if (!VecOp.hasOneUse())
19280 return SDValue();
19282 // If the bit convert changed the number of elements, it is unsafe
19283 // to examine the mask.
19284 if (BCNumEltsChanged)
19285 return SDValue();
19287 // Select the input vector, guarding against out of range extract vector.
19288 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19289 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19291 if (VecOp.getOpcode() == ISD::BITCAST) {
19292 // Don't duplicate a load with other uses.
19293 if (!VecOp.hasOneUse())
19294 return SDValue();
19296 VecOp = VecOp.getOperand(0);
19298 if (ISD::isNormalLoad(VecOp.getNode())) {
19299 LN0 = cast<LoadSDNode>(VecOp);
19300 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19301 Index = DAG.getConstant(Elt, DL, Index.getValueType());
19303 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19304 VecVT.getVectorElementType() == ScalarVT &&
19305 (!LegalTypes ||
19306 TLI.isTypeLegal(
19307 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19308 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19309 // -> extract_vector_elt a, 0
19310 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19311 // -> extract_vector_elt a, 1
19312 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19313 // -> extract_vector_elt b, 0
19314 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19315 // -> extract_vector_elt b, 1
19316 SDLoc SL(N);
19317 EVT ConcatVT = VecOp.getOperand(0).getValueType();
19318 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19319 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19320 Index.getValueType());
19322 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19323 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19324 ConcatVT.getVectorElementType(),
19325 ConcatOp, NewIdx);
19326 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19329 // Make sure we found a non-volatile load and the extractelement is
19330 // the only use.
19331 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19332 return SDValue();
19334 // If Idx was -1 above, Elt is going to be -1, so just return undef.
19335 if (Elt == -1)
19336 return DAG.getUNDEF(LVT);
19338 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19341 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19342 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19343 // We perform this optimization post type-legalization because
19344 // the type-legalizer often scalarizes integer-promoted vectors.
19345 // Performing this optimization before may create bit-casts which
19346 // will be type-legalized to complex code sequences.
19347 // We perform this optimization only before the operation legalizer because we
19348 // may introduce illegal operations.
19349 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19350 return SDValue();
19352 unsigned NumInScalars = N->getNumOperands();
19353 SDLoc DL(N);
19354 EVT VT = N->getValueType(0);
19356 // Check to see if this is a BUILD_VECTOR of a bunch of values
19357 // which come from any_extend or zero_extend nodes. If so, we can create
19358 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19359 // optimizations. We do not handle sign-extend because we can't fill the sign
19360 // using shuffles.
19361 EVT SourceType = MVT::Other;
19362 bool AllAnyExt = true;
19364 for (unsigned i = 0; i != NumInScalars; ++i) {
19365 SDValue In = N->getOperand(i);
19366 // Ignore undef inputs.
19367 if (In.isUndef()) continue;
19369 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
19370 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19372 // Abort if the element is not an extension.
19373 if (!ZeroExt && !AnyExt) {
19374 SourceType = MVT::Other;
19375 break;
19378 // The input is a ZeroExt or AnyExt. Check the original type.
19379 EVT InTy = In.getOperand(0).getValueType();
19381 // Check that all of the widened source types are the same.
19382 if (SourceType == MVT::Other)
19383 // First time.
19384 SourceType = InTy;
19385 else if (InTy != SourceType) {
19386 // Multiple income types. Abort.
19387 SourceType = MVT::Other;
19388 break;
19391 // Check if all of the extends are ANY_EXTENDs.
19392 AllAnyExt &= AnyExt;
19395 // In order to have valid types, all of the inputs must be extended from the
19396 // same source type and all of the inputs must be any or zero extend.
19397 // Scalar sizes must be a power of two.
19398 EVT OutScalarTy = VT.getScalarType();
19399 bool ValidTypes = SourceType != MVT::Other &&
19400 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19401 isPowerOf2_32(SourceType.getSizeInBits());
19403 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19404 // turn into a single shuffle instruction.
19405 if (!ValidTypes)
19406 return SDValue();
19408 // If we already have a splat buildvector, then don't fold it if it means
19409 // introducing zeros.
19410 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19411 return SDValue();
19413 bool isLE = DAG.getDataLayout().isLittleEndian();
19414 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19415 assert(ElemRatio > 1 && "Invalid element size ratio");
19416 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19417 DAG.getConstant(0, DL, SourceType);
19419 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19420 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19422 // Populate the new build_vector
19423 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19424 SDValue Cast = N->getOperand(i);
19425 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19426 Cast.getOpcode() == ISD::ZERO_EXTEND ||
19427 Cast.isUndef()) && "Invalid cast opcode");
19428 SDValue In;
19429 if (Cast.isUndef())
19430 In = DAG.getUNDEF(SourceType);
19431 else
19432 In = Cast->getOperand(0);
19433 unsigned Index = isLE ? (i * ElemRatio) :
19434 (i * ElemRatio + (ElemRatio - 1));
19436 assert(Index < Ops.size() && "Invalid index");
19437 Ops[Index] = In;
19440 // The type of the new BUILD_VECTOR node.
19441 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19442 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19443 "Invalid vector size");
19444 // Check if the new vector type is legal.
19445 if (!isTypeLegal(VecVT) ||
19446 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19447 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19448 return SDValue();
19450 // Make the new BUILD_VECTOR.
19451 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19453 // The new BUILD_VECTOR node has the potential to be further optimized.
19454 AddToWorklist(BV.getNode());
19455 // Bitcast to the desired type.
19456 return DAG.getBitcast(VT, BV);
19459 // Simplify (build_vec (trunc $1)
19460 // (trunc (srl $1 half-width))
19461 // (trunc (srl $1 (2 * half-width))) …)
19462 // to (bitcast $1)
19463 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19464 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19466 // Only for little endian
19467 if (!DAG.getDataLayout().isLittleEndian())
19468 return SDValue();
19470 SDLoc DL(N);
19471 EVT VT = N->getValueType(0);
19472 EVT OutScalarTy = VT.getScalarType();
19473 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19475 // Only for power of two types to be sure that bitcast works well
19476 if (!isPowerOf2_64(ScalarTypeBitsize))
19477 return SDValue();
19479 unsigned NumInScalars = N->getNumOperands();
19481 // Look through bitcasts
19482 auto PeekThroughBitcast = [](SDValue Op) {
19483 if (Op.getOpcode() == ISD::BITCAST)
19484 return Op.getOperand(0);
19485 return Op;
19488 // The source value where all the parts are extracted.
19489 SDValue Src;
19490 for (unsigned i = 0; i != NumInScalars; ++i) {
19491 SDValue In = PeekThroughBitcast(N->getOperand(i));
19492 // Ignore undef inputs.
19493 if (In.isUndef()) continue;
19495 if (In.getOpcode() != ISD::TRUNCATE)
19496 return SDValue();
19498 In = PeekThroughBitcast(In.getOperand(0));
19500 if (In.getOpcode() != ISD::SRL) {
19501 // For now only build_vec without shuffling, handle shifts here in the
19502 // future.
19503 if (i != 0)
19504 return SDValue();
19506 Src = In;
19507 } else {
19508 // In is SRL
19509 SDValue part = PeekThroughBitcast(In.getOperand(0));
19511 if (!Src) {
19512 Src = part;
19513 } else if (Src != part) {
19514 // Vector parts do not stem from the same variable
19515 return SDValue();
19518 SDValue ShiftAmtVal = In.getOperand(1);
19519 if (!isa<ConstantSDNode>(ShiftAmtVal))
19520 return SDValue();
19522 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19524 // The extracted value is not extracted at the right position
19525 if (ShiftAmt != i * ScalarTypeBitsize)
19526 return SDValue();
19530 // Only cast if the size is the same
19531 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19532 return SDValue();
19534 return DAG.getBitcast(VT, Src);
19537 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19538 ArrayRef<int> VectorMask,
19539 SDValue VecIn1, SDValue VecIn2,
19540 unsigned LeftIdx, bool DidSplitVec) {
19541 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19543 EVT VT = N->getValueType(0);
19544 EVT InVT1 = VecIn1.getValueType();
19545 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19547 unsigned NumElems = VT.getVectorNumElements();
19548 unsigned ShuffleNumElems = NumElems;
19550 // If we artificially split a vector in two already, then the offsets in the
19551 // operands will all be based off of VecIn1, even those in VecIn2.
19552 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19554 uint64_t VTSize = VT.getFixedSizeInBits();
19555 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19556 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19558 assert(InVT2Size <= InVT1Size &&
19559 "Inputs must be sorted to be in non-increasing vector size order.");
19561 // We can't generate a shuffle node with mismatched input and output types.
19562 // Try to make the types match the type of the output.
19563 if (InVT1 != VT || InVT2 != VT) {
19564 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19565 // If the output vector length is a multiple of both input lengths,
19566 // we can concatenate them and pad the rest with undefs.
19567 unsigned NumConcats = VTSize / InVT1Size;
19568 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19569 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19570 ConcatOps[0] = VecIn1;
19571 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19572 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19573 VecIn2 = SDValue();
19574 } else if (InVT1Size == VTSize * 2) {
19575 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19576 return SDValue();
19578 if (!VecIn2.getNode()) {
19579 // If we only have one input vector, and it's twice the size of the
19580 // output, split it in two.
19581 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19582 DAG.getVectorIdxConstant(NumElems, DL));
19583 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19584 // Since we now have shorter input vectors, adjust the offset of the
19585 // second vector's start.
19586 Vec2Offset = NumElems;
19587 } else {
19588 assert(InVT2Size <= InVT1Size &&
19589 "Second input is not going to be larger than the first one.");
19591 // VecIn1 is wider than the output, and we have another, possibly
19592 // smaller input. Pad the smaller input with undefs, shuffle at the
19593 // input vector width, and extract the output.
19594 // The shuffle type is different than VT, so check legality again.
19595 if (LegalOperations &&
19596 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19597 return SDValue();
19599 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19600 // lower it back into a BUILD_VECTOR. So if the inserted type is
19601 // illegal, don't even try.
19602 if (InVT1 != InVT2) {
19603 if (!TLI.isTypeLegal(InVT2))
19604 return SDValue();
19605 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19606 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19608 ShuffleNumElems = NumElems * 2;
19610 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19611 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19612 ConcatOps[0] = VecIn2;
19613 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19614 } else {
19615 // TODO: Support cases where the length mismatch isn't exactly by a
19616 // factor of 2.
19617 // TODO: Move this check upwards, so that if we have bad type
19618 // mismatches, we don't create any DAG nodes.
19619 return SDValue();
19623 // Initialize mask to undef.
19624 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19626 // Only need to run up to the number of elements actually used, not the
19627 // total number of elements in the shuffle - if we are shuffling a wider
19628 // vector, the high lanes should be set to undef.
19629 for (unsigned i = 0; i != NumElems; ++i) {
19630 if (VectorMask[i] <= 0)
19631 continue;
19633 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19634 if (VectorMask[i] == (int)LeftIdx) {
19635 Mask[i] = ExtIndex;
19636 } else if (VectorMask[i] == (int)LeftIdx + 1) {
19637 Mask[i] = Vec2Offset + ExtIndex;
19641 // The type the input vectors may have changed above.
19642 InVT1 = VecIn1.getValueType();
19644 // If we already have a VecIn2, it should have the same type as VecIn1.
19645 // If we don't, get an undef/zero vector of the appropriate type.
19646 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19647 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19649 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19650 if (ShuffleNumElems > NumElems)
19651 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19653 return Shuffle;
19656 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19657 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19659 // First, determine where the build vector is not undef.
19660 // TODO: We could extend this to handle zero elements as well as undefs.
19661 int NumBVOps = BV->getNumOperands();
19662 int ZextElt = -1;
19663 for (int i = 0; i != NumBVOps; ++i) {
19664 SDValue Op = BV->getOperand(i);
19665 if (Op.isUndef())
19666 continue;
19667 if (ZextElt == -1)
19668 ZextElt = i;
19669 else
19670 return SDValue();
19672 // Bail out if there's no non-undef element.
19673 if (ZextElt == -1)
19674 return SDValue();
19676 // The build vector contains some number of undef elements and exactly
19677 // one other element. That other element must be a zero-extended scalar
19678 // extracted from a vector at a constant index to turn this into a shuffle.
19679 // Also, require that the build vector does not implicitly truncate/extend
19680 // its elements.
19681 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19682 EVT VT = BV->getValueType(0);
19683 SDValue Zext = BV->getOperand(ZextElt);
19684 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19685 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19686 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19687 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19688 return SDValue();
19690 // The zero-extend must be a multiple of the source size, and we must be
19691 // building a vector of the same size as the source of the extract element.
19692 SDValue Extract = Zext.getOperand(0);
19693 unsigned DestSize = Zext.getValueSizeInBits();
19694 unsigned SrcSize = Extract.getValueSizeInBits();
19695 if (DestSize % SrcSize != 0 ||
19696 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19697 return SDValue();
19699 // Create a shuffle mask that will combine the extracted element with zeros
19700 // and undefs.
19701 int ZextRatio = DestSize / SrcSize;
19702 int NumMaskElts = NumBVOps * ZextRatio;
19703 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19704 for (int i = 0; i != NumMaskElts; ++i) {
19705 if (i / ZextRatio == ZextElt) {
19706 // The low bits of the (potentially translated) extracted element map to
19707 // the source vector. The high bits map to zero. We will use a zero vector
19708 // as the 2nd source operand of the shuffle, so use the 1st element of
19709 // that vector (mask value is number-of-elements) for the high bits.
19710 if (i % ZextRatio == 0)
19711 ShufMask[i] = Extract.getConstantOperandVal(1);
19712 else
19713 ShufMask[i] = NumMaskElts;
19716 // Undef elements of the build vector remain undef because we initialize
19717 // the shuffle mask with -1.
19720 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19721 // bitcast (shuffle V, ZeroVec, VectorMask)
19722 SDLoc DL(BV);
19723 EVT VecVT = Extract.getOperand(0).getValueType();
19724 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19725 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19726 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19727 ZeroVec, ShufMask, DAG);
19728 if (!Shuf)
19729 return SDValue();
19730 return DAG.getBitcast(VT, Shuf);
19733 // FIXME: promote to STLExtras.
19734 template <typename R, typename T>
19735 static auto getFirstIndexOf(R &&Range, const T &Val) {
19736 auto I = find(Range, Val);
19737 if (I == Range.end())
19738 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19739 return std::distance(Range.begin(), I);
19742 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19743 // operations. If the types of the vectors we're extracting from allow it,
19744 // turn this into a vector_shuffle node.
19745 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19746 SDLoc DL(N);
19747 EVT VT = N->getValueType(0);
19749 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19750 if (!isTypeLegal(VT))
19751 return SDValue();
19753 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19754 return V;
19756 // May only combine to shuffle after legalize if shuffle is legal.
19757 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19758 return SDValue();
19760 bool UsesZeroVector = false;
19761 unsigned NumElems = N->getNumOperands();
19763 // Record, for each element of the newly built vector, which input vector
19764 // that element comes from. -1 stands for undef, 0 for the zero vector,
19765 // and positive values for the input vectors.
19766 // VectorMask maps each element to its vector number, and VecIn maps vector
19767 // numbers to their initial SDValues.
19769 SmallVector<int, 8> VectorMask(NumElems, -1);
19770 SmallVector<SDValue, 8> VecIn;
19771 VecIn.push_back(SDValue());
19773 for (unsigned i = 0; i != NumElems; ++i) {
19774 SDValue Op = N->getOperand(i);
19776 if (Op.isUndef())
19777 continue;
19779 // See if we can use a blend with a zero vector.
19780 // TODO: Should we generalize this to a blend with an arbitrary constant
19781 // vector?
19782 if (isNullConstant(Op) || isNullFPConstant(Op)) {
19783 UsesZeroVector = true;
19784 VectorMask[i] = 0;
19785 continue;
19788 // Not an undef or zero. If the input is something other than an
19789 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19790 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19791 !isa<ConstantSDNode>(Op.getOperand(1)))
19792 return SDValue();
19793 SDValue ExtractedFromVec = Op.getOperand(0);
19795 if (ExtractedFromVec.getValueType().isScalableVector())
19796 return SDValue();
19798 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19799 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19800 return SDValue();
19802 // All inputs must have the same element type as the output.
19803 if (VT.getVectorElementType() !=
19804 ExtractedFromVec.getValueType().getVectorElementType())
19805 return SDValue();
19807 // Have we seen this input vector before?
19808 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19809 // a map back from SDValues to numbers isn't worth it.
19810 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19811 if (Idx == -1) { // A new source vector?
19812 Idx = VecIn.size();
19813 VecIn.push_back(ExtractedFromVec);
19816 VectorMask[i] = Idx;
19819 // If we didn't find at least one input vector, bail out.
19820 if (VecIn.size() < 2)
19821 return SDValue();
19823 // If all the Operands of BUILD_VECTOR extract from same
19824 // vector, then split the vector efficiently based on the maximum
19825 // vector access index and adjust the VectorMask and
19826 // VecIn accordingly.
19827 bool DidSplitVec = false;
19828 if (VecIn.size() == 2) {
19829 unsigned MaxIndex = 0;
19830 unsigned NearestPow2 = 0;
19831 SDValue Vec = VecIn.back();
19832 EVT InVT = Vec.getValueType();
19833 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19835 for (unsigned i = 0; i < NumElems; i++) {
19836 if (VectorMask[i] <= 0)
19837 continue;
19838 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19839 IndexVec[i] = Index;
19840 MaxIndex = std::max(MaxIndex, Index);
19843 NearestPow2 = PowerOf2Ceil(MaxIndex);
19844 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19845 NumElems * 2 < NearestPow2) {
19846 unsigned SplitSize = NearestPow2 / 2;
19847 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19848 InVT.getVectorElementType(), SplitSize);
19849 if (TLI.isTypeLegal(SplitVT) &&
19850 SplitSize + SplitVT.getVectorNumElements() <=
19851 InVT.getVectorNumElements()) {
19852 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19853 DAG.getVectorIdxConstant(SplitSize, DL));
19854 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19855 DAG.getVectorIdxConstant(0, DL));
19856 VecIn.pop_back();
19857 VecIn.push_back(VecIn1);
19858 VecIn.push_back(VecIn2);
19859 DidSplitVec = true;
19861 for (unsigned i = 0; i < NumElems; i++) {
19862 if (VectorMask[i] <= 0)
19863 continue;
19864 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19870 // Sort input vectors by decreasing vector element count,
19871 // while preserving the relative order of equally-sized vectors.
19872 // Note that we keep the first "implicit zero vector as-is.
19873 SmallVector<SDValue, 8> SortedVecIn(VecIn);
19874 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19875 [](const SDValue &a, const SDValue &b) {
19876 return a.getValueType().getVectorNumElements() >
19877 b.getValueType().getVectorNumElements();
19880 // We now also need to rebuild the VectorMask, because it referenced element
19881 // order in VecIn, and we just sorted them.
19882 for (int &SourceVectorIndex : VectorMask) {
19883 if (SourceVectorIndex <= 0)
19884 continue;
19885 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19886 assert(Idx > 0 && Idx < SortedVecIn.size() &&
19887 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
19888 SourceVectorIndex = Idx;
19891 VecIn = std::move(SortedVecIn);
19893 // TODO: Should this fire if some of the input vectors has illegal type (like
19894 // it does now), or should we let legalization run its course first?
19896 // Shuffle phase:
19897 // Take pairs of vectors, and shuffle them so that the result has elements
19898 // from these vectors in the correct places.
19899 // For example, given:
19900 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19901 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19902 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19903 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19904 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19905 // We will generate:
19906 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19907 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19908 SmallVector<SDValue, 4> Shuffles;
19909 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19910 unsigned LeftIdx = 2 * In + 1;
19911 SDValue VecLeft = VecIn[LeftIdx];
19912 SDValue VecRight =
19913 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19915 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19916 VecRight, LeftIdx, DidSplitVec))
19917 Shuffles.push_back(Shuffle);
19918 else
19919 return SDValue();
19922 // If we need the zero vector as an "ingredient" in the blend tree, add it
19923 // to the list of shuffles.
19924 if (UsesZeroVector)
19925 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19926 : DAG.getConstantFP(0.0, DL, VT));
19928 // If we only have one shuffle, we're done.
19929 if (Shuffles.size() == 1)
19930 return Shuffles[0];
19932 // Update the vector mask to point to the post-shuffle vectors.
19933 for (int &Vec : VectorMask)
19934 if (Vec == 0)
19935 Vec = Shuffles.size() - 1;
19936 else
19937 Vec = (Vec - 1) / 2;
19939 // More than one shuffle. Generate a binary tree of blends, e.g. if from
19940 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19941 // generate:
19942 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19943 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19944 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19945 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19946 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19947 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19948 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19950 // Make sure the initial size of the shuffle list is even.
19951 if (Shuffles.size() % 2)
19952 Shuffles.push_back(DAG.getUNDEF(VT));
19954 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19955 if (CurSize % 2) {
19956 Shuffles[CurSize] = DAG.getUNDEF(VT);
19957 CurSize++;
19959 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19960 int Left = 2 * In;
19961 int Right = 2 * In + 1;
19962 SmallVector<int, 8> Mask(NumElems, -1);
19963 for (unsigned i = 0; i != NumElems; ++i) {
19964 if (VectorMask[i] == Left) {
19965 Mask[i] = i;
19966 VectorMask[i] = In;
19967 } else if (VectorMask[i] == Right) {
19968 Mask[i] = i + NumElems;
19969 VectorMask[i] = In;
19973 Shuffles[In] =
19974 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19977 return Shuffles[0];
19980 // Try to turn a build vector of zero extends of extract vector elts into a
19981 // a vector zero extend and possibly an extract subvector.
19982 // TODO: Support sign extend?
19983 // TODO: Allow undef elements?
19984 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19985 if (LegalOperations)
19986 return SDValue();
19988 EVT VT = N->getValueType(0);
19990 bool FoundZeroExtend = false;
19991 SDValue Op0 = N->getOperand(0);
19992 auto checkElem = [&](SDValue Op) -> int64_t {
19993 unsigned Opc = Op.getOpcode();
19994 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19995 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19996 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19997 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19998 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19999 return C->getZExtValue();
20000 return -1;
20003 // Make sure the first element matches
20004 // (zext (extract_vector_elt X, C))
20005 // Offset must be a constant multiple of the
20006 // known-minimum vector length of the result type.
20007 int64_t Offset = checkElem(Op0);
20008 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20009 return SDValue();
20011 unsigned NumElems = N->getNumOperands();
20012 SDValue In = Op0.getOperand(0).getOperand(0);
20013 EVT InSVT = In.getValueType().getScalarType();
20014 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20016 // Don't create an illegal input type after type legalization.
20017 if (LegalTypes && !TLI.isTypeLegal(InVT))
20018 return SDValue();
20020 // Ensure all the elements come from the same vector and are adjacent.
20021 for (unsigned i = 1; i != NumElems; ++i) {
20022 if ((Offset + i) != checkElem(N->getOperand(i)))
20023 return SDValue();
20026 SDLoc DL(N);
20027 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20028 Op0.getOperand(0).getOperand(1));
20029 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20030 VT, In);
20033 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20034 EVT VT = N->getValueType(0);
20036 // A vector built entirely of undefs is undef.
20037 if (ISD::allOperandsUndef(N))
20038 return DAG.getUNDEF(VT);
20040 // If this is a splat of a bitcast from another vector, change to a
20041 // concat_vector.
20042 // For example:
20043 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20044 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20046 // If X is a build_vector itself, the concat can become a larger build_vector.
20047 // TODO: Maybe this is useful for non-splat too?
20048 if (!LegalOperations) {
20049 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20050 Splat = peekThroughBitcasts(Splat);
20051 EVT SrcVT = Splat.getValueType();
20052 if (SrcVT.isVector()) {
20053 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20054 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20055 SrcVT.getVectorElementType(), NumElts);
20056 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20057 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20058 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20059 NewVT, Ops);
20060 return DAG.getBitcast(VT, Concat);
20066 // Check if we can express BUILD VECTOR via subvector extract.
20067 if (!LegalTypes && (N->getNumOperands() > 1)) {
20068 SDValue Op0 = N->getOperand(0);
20069 auto checkElem = [&](SDValue Op) -> uint64_t {
20070 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20071 (Op0.getOperand(0) == Op.getOperand(0)))
20072 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20073 return CNode->getZExtValue();
20074 return -1;
20077 int Offset = checkElem(Op0);
20078 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20079 if (Offset + i != checkElem(N->getOperand(i))) {
20080 Offset = -1;
20081 break;
20085 if ((Offset == 0) &&
20086 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20087 return Op0.getOperand(0);
20088 if ((Offset != -1) &&
20089 ((Offset % N->getValueType(0).getVectorNumElements()) ==
20090 0)) // IDX must be multiple of output size.
20091 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20092 Op0.getOperand(0), Op0.getOperand(1));
20095 if (SDValue V = convertBuildVecZextToZext(N))
20096 return V;
20098 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20099 return V;
20101 if (SDValue V = reduceBuildVecTruncToBitCast(N))
20102 return V;
20104 if (SDValue V = reduceBuildVecToShuffle(N))
20105 return V;
20107 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20108 // Do this late as some of the above may replace the splat.
20109 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20110 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20111 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20112 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20115 return SDValue();
20118 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20119 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20120 EVT OpVT = N->getOperand(0).getValueType();
20122 // If the operands are legal vectors, leave them alone.
20123 if (TLI.isTypeLegal(OpVT))
20124 return SDValue();
20126 SDLoc DL(N);
20127 EVT VT = N->getValueType(0);
20128 SmallVector<SDValue, 8> Ops;
20130 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20131 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20133 // Keep track of what we encounter.
20134 bool AnyInteger = false;
20135 bool AnyFP = false;
20136 for (const SDValue &Op : N->ops()) {
20137 if (ISD::BITCAST == Op.getOpcode() &&
20138 !Op.getOperand(0).getValueType().isVector())
20139 Ops.push_back(Op.getOperand(0));
20140 else if (ISD::UNDEF == Op.getOpcode())
20141 Ops.push_back(ScalarUndef);
20142 else
20143 return SDValue();
20145 // Note whether we encounter an integer or floating point scalar.
20146 // If it's neither, bail out, it could be something weird like x86mmx.
20147 EVT LastOpVT = Ops.back().getValueType();
20148 if (LastOpVT.isFloatingPoint())
20149 AnyFP = true;
20150 else if (LastOpVT.isInteger())
20151 AnyInteger = true;
20152 else
20153 return SDValue();
20156 // If any of the operands is a floating point scalar bitcast to a vector,
20157 // use floating point types throughout, and bitcast everything.
20158 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20159 if (AnyFP) {
20160 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20161 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20162 if (AnyInteger) {
20163 for (SDValue &Op : Ops) {
20164 if (Op.getValueType() == SVT)
20165 continue;
20166 if (Op.isUndef())
20167 Op = ScalarUndef;
20168 else
20169 Op = DAG.getBitcast(SVT, Op);
20174 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20175 VT.getSizeInBits() / SVT.getSizeInBits());
20176 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20179 // Attempt to merge nested concat_vectors/undefs.
20180 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20181 // --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20182 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20183 SelectionDAG &DAG) {
20184 EVT VT = N->getValueType(0);
20186 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20187 EVT SubVT;
20188 SDValue FirstConcat;
20189 for (const SDValue &Op : N->ops()) {
20190 if (Op.isUndef())
20191 continue;
20192 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20193 return SDValue();
20194 if (!FirstConcat) {
20195 SubVT = Op.getOperand(0).getValueType();
20196 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20197 return SDValue();
20198 FirstConcat = Op;
20199 continue;
20201 if (SubVT != Op.getOperand(0).getValueType())
20202 return SDValue();
20204 assert(FirstConcat && "Concat of all-undefs found");
20206 SmallVector<SDValue> ConcatOps;
20207 for (const SDValue &Op : N->ops()) {
20208 if (Op.isUndef()) {
20209 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20210 continue;
20212 ConcatOps.append(Op->op_begin(), Op->op_end());
20214 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20217 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20218 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20219 // most two distinct vectors the same size as the result, attempt to turn this
20220 // into a legal shuffle.
20221 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20222 EVT VT = N->getValueType(0);
20223 EVT OpVT = N->getOperand(0).getValueType();
20225 // We currently can't generate an appropriate shuffle for a scalable vector.
20226 if (VT.isScalableVector())
20227 return SDValue();
20229 int NumElts = VT.getVectorNumElements();
20230 int NumOpElts = OpVT.getVectorNumElements();
20232 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20233 SmallVector<int, 8> Mask;
20235 for (SDValue Op : N->ops()) {
20236 Op = peekThroughBitcasts(Op);
20238 // UNDEF nodes convert to UNDEF shuffle mask values.
20239 if (Op.isUndef()) {
20240 Mask.append((unsigned)NumOpElts, -1);
20241 continue;
20244 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20245 return SDValue();
20247 // What vector are we extracting the subvector from and at what index?
20248 SDValue ExtVec = Op.getOperand(0);
20249 int ExtIdx = Op.getConstantOperandVal(1);
20251 // We want the EVT of the original extraction to correctly scale the
20252 // extraction index.
20253 EVT ExtVT = ExtVec.getValueType();
20254 ExtVec = peekThroughBitcasts(ExtVec);
20256 // UNDEF nodes convert to UNDEF shuffle mask values.
20257 if (ExtVec.isUndef()) {
20258 Mask.append((unsigned)NumOpElts, -1);
20259 continue;
20262 // Ensure that we are extracting a subvector from a vector the same
20263 // size as the result.
20264 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20265 return SDValue();
20267 // Scale the subvector index to account for any bitcast.
20268 int NumExtElts = ExtVT.getVectorNumElements();
20269 if (0 == (NumExtElts % NumElts))
20270 ExtIdx /= (NumExtElts / NumElts);
20271 else if (0 == (NumElts % NumExtElts))
20272 ExtIdx *= (NumElts / NumExtElts);
20273 else
20274 return SDValue();
20276 // At most we can reference 2 inputs in the final shuffle.
20277 if (SV0.isUndef() || SV0 == ExtVec) {
20278 SV0 = ExtVec;
20279 for (int i = 0; i != NumOpElts; ++i)
20280 Mask.push_back(i + ExtIdx);
20281 } else if (SV1.isUndef() || SV1 == ExtVec) {
20282 SV1 = ExtVec;
20283 for (int i = 0; i != NumOpElts; ++i)
20284 Mask.push_back(i + ExtIdx + NumElts);
20285 } else {
20286 return SDValue();
20290 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20291 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20292 DAG.getBitcast(VT, SV1), Mask, DAG);
20295 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20296 unsigned CastOpcode = N->getOperand(0).getOpcode();
20297 switch (CastOpcode) {
20298 case ISD::SINT_TO_FP:
20299 case ISD::UINT_TO_FP:
20300 case ISD::FP_TO_SINT:
20301 case ISD::FP_TO_UINT:
20302 // TODO: Allow more opcodes?
20303 // case ISD::BITCAST:
20304 // case ISD::TRUNCATE:
20305 // case ISD::ZERO_EXTEND:
20306 // case ISD::SIGN_EXTEND:
20307 // case ISD::FP_EXTEND:
20308 break;
20309 default:
20310 return SDValue();
20313 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20314 if (!SrcVT.isVector())
20315 return SDValue();
20317 // All operands of the concat must be the same kind of cast from the same
20318 // source type.
20319 SmallVector<SDValue, 4> SrcOps;
20320 for (SDValue Op : N->ops()) {
20321 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20322 Op.getOperand(0).getValueType() != SrcVT)
20323 return SDValue();
20324 SrcOps.push_back(Op.getOperand(0));
20327 // The wider cast must be supported by the target. This is unusual because
20328 // the operation support type parameter depends on the opcode. In addition,
20329 // check the other type in the cast to make sure this is really legal.
20330 EVT VT = N->getValueType(0);
20331 EVT SrcEltVT = SrcVT.getVectorElementType();
20332 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20333 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20334 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20335 switch (CastOpcode) {
20336 case ISD::SINT_TO_FP:
20337 case ISD::UINT_TO_FP:
20338 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20339 !TLI.isTypeLegal(VT))
20340 return SDValue();
20341 break;
20342 case ISD::FP_TO_SINT:
20343 case ISD::FP_TO_UINT:
20344 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20345 !TLI.isTypeLegal(ConcatSrcVT))
20346 return SDValue();
20347 break;
20348 default:
20349 llvm_unreachable("Unexpected cast opcode");
20352 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20353 SDLoc DL(N);
20354 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20355 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20358 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20359 // If we only have one input vector, we don't need to do any concatenation.
20360 if (N->getNumOperands() == 1)
20361 return N->getOperand(0);
20363 // Check if all of the operands are undefs.
20364 EVT VT = N->getValueType(0);
20365 if (ISD::allOperandsUndef(N))
20366 return DAG.getUNDEF(VT);
20368 // Optimize concat_vectors where all but the first of the vectors are undef.
20369 if (all_of(drop_begin(N->ops()),
20370 [](const SDValue &Op) { return Op.isUndef(); })) {
20371 SDValue In = N->getOperand(0);
20372 assert(In.getValueType().isVector() && "Must concat vectors");
20374 // If the input is a concat_vectors, just make a larger concat by padding
20375 // with smaller undefs.
20376 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20377 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20378 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20379 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20380 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20383 SDValue Scalar = peekThroughOneUseBitcasts(In);
20385 // concat_vectors(scalar_to_vector(scalar), undef) ->
20386 // scalar_to_vector(scalar)
20387 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20388 Scalar.hasOneUse()) {
20389 EVT SVT = Scalar.getValueType().getVectorElementType();
20390 if (SVT == Scalar.getOperand(0).getValueType())
20391 Scalar = Scalar.getOperand(0);
20394 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20395 if (!Scalar.getValueType().isVector()) {
20396 // If the bitcast type isn't legal, it might be a trunc of a legal type;
20397 // look through the trunc so we can still do the transform:
20398 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20399 if (Scalar->getOpcode() == ISD::TRUNCATE &&
20400 !TLI.isTypeLegal(Scalar.getValueType()) &&
20401 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20402 Scalar = Scalar->getOperand(0);
20404 EVT SclTy = Scalar.getValueType();
20406 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20407 return SDValue();
20409 // Bail out if the vector size is not a multiple of the scalar size.
20410 if (VT.getSizeInBits() % SclTy.getSizeInBits())
20411 return SDValue();
20413 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20414 if (VNTNumElms < 2)
20415 return SDValue();
20417 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20418 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20419 return SDValue();
20421 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20422 return DAG.getBitcast(VT, Res);
20426 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20427 // We have already tested above for an UNDEF only concatenation.
20428 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20429 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20430 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20431 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20433 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20434 SmallVector<SDValue, 8> Opnds;
20435 EVT SVT = VT.getScalarType();
20437 EVT MinVT = SVT;
20438 if (!SVT.isFloatingPoint()) {
20439 // If BUILD_VECTOR are from built from integer, they may have different
20440 // operand types. Get the smallest type and truncate all operands to it.
20441 bool FoundMinVT = false;
20442 for (const SDValue &Op : N->ops())
20443 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20444 EVT OpSVT = Op.getOperand(0).getValueType();
20445 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20446 FoundMinVT = true;
20448 assert(FoundMinVT && "Concat vector type mismatch");
20451 for (const SDValue &Op : N->ops()) {
20452 EVT OpVT = Op.getValueType();
20453 unsigned NumElts = OpVT.getVectorNumElements();
20455 if (ISD::UNDEF == Op.getOpcode())
20456 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20458 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20459 if (SVT.isFloatingPoint()) {
20460 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20461 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20462 } else {
20463 for (unsigned i = 0; i != NumElts; ++i)
20464 Opnds.push_back(
20465 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20470 assert(VT.getVectorNumElements() == Opnds.size() &&
20471 "Concat vector type mismatch");
20472 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20475 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20476 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20477 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20478 return V;
20480 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20481 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20482 if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20483 return V;
20485 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20486 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20487 return V;
20490 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20491 return V;
20493 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20494 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20495 // operands and look for a CONCAT operations that place the incoming vectors
20496 // at the exact same location.
20498 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20499 SDValue SingleSource = SDValue();
20500 unsigned PartNumElem =
20501 N->getOperand(0).getValueType().getVectorMinNumElements();
20503 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20504 SDValue Op = N->getOperand(i);
20506 if (Op.isUndef())
20507 continue;
20509 // Check if this is the identity extract:
20510 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20511 return SDValue();
20513 // Find the single incoming vector for the extract_subvector.
20514 if (SingleSource.getNode()) {
20515 if (Op.getOperand(0) != SingleSource)
20516 return SDValue();
20517 } else {
20518 SingleSource = Op.getOperand(0);
20520 // Check the source type is the same as the type of the result.
20521 // If not, this concat may extend the vector, so we can not
20522 // optimize it away.
20523 if (SingleSource.getValueType() != N->getValueType(0))
20524 return SDValue();
20527 // Check that we are reading from the identity index.
20528 unsigned IdentityIndex = i * PartNumElem;
20529 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20530 return SDValue();
20533 if (SingleSource.getNode())
20534 return SingleSource;
20536 return SDValue();
20539 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20540 // if the subvector can be sourced for free.
20541 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20542 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20543 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20544 return V.getOperand(1);
20546 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20547 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20548 V.getOperand(0).getValueType() == SubVT &&
20549 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20550 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20551 return V.getOperand(SubIdx);
20553 return SDValue();
20556 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20557 SelectionDAG &DAG,
20558 bool LegalOperations) {
20559 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20560 SDValue BinOp = Extract->getOperand(0);
20561 unsigned BinOpcode = BinOp.getOpcode();
20562 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20563 return SDValue();
20565 EVT VecVT = BinOp.getValueType();
20566 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20567 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20568 return SDValue();
20570 SDValue Index = Extract->getOperand(1);
20571 EVT SubVT = Extract->getValueType(0);
20572 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20573 return SDValue();
20575 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20576 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20578 // TODO: We could handle the case where only 1 operand is being inserted by
20579 // creating an extract of the other operand, but that requires checking
20580 // number of uses and/or costs.
20581 if (!Sub0 || !Sub1)
20582 return SDValue();
20584 // We are inserting both operands of the wide binop only to extract back
20585 // to the narrow vector size. Eliminate all of the insert/extract:
20586 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20587 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20588 BinOp->getFlags());
20591 /// If we are extracting a subvector produced by a wide binary operator try
20592 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20593 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20594 bool LegalOperations) {
20595 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20596 // some of these bailouts with other transforms.
20598 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20599 return V;
20601 // The extract index must be a constant, so we can map it to a concat operand.
20602 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20603 if (!ExtractIndexC)
20604 return SDValue();
20606 // We are looking for an optionally bitcasted wide vector binary operator
20607 // feeding an extract subvector.
20608 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20609 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20610 unsigned BOpcode = BinOp.getOpcode();
20611 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20612 return SDValue();
20614 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20615 // reduced to the unary fneg when it is visited, and we probably want to deal
20616 // with fneg in a target-specific way.
20617 if (BOpcode == ISD::FSUB) {
20618 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20619 if (C && C->getValueAPF().isNegZero())
20620 return SDValue();
20623 // The binop must be a vector type, so we can extract some fraction of it.
20624 EVT WideBVT = BinOp.getValueType();
20625 // The optimisations below currently assume we are dealing with fixed length
20626 // vectors. It is possible to add support for scalable vectors, but at the
20627 // moment we've done no analysis to prove whether they are profitable or not.
20628 if (!WideBVT.isFixedLengthVector())
20629 return SDValue();
20631 EVT VT = Extract->getValueType(0);
20632 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20633 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20634 "Extract index is not a multiple of the vector length.");
20636 // Bail out if this is not a proper multiple width extraction.
20637 unsigned WideWidth = WideBVT.getSizeInBits();
20638 unsigned NarrowWidth = VT.getSizeInBits();
20639 if (WideWidth % NarrowWidth != 0)
20640 return SDValue();
20642 // Bail out if we are extracting a fraction of a single operation. This can
20643 // occur because we potentially looked through a bitcast of the binop.
20644 unsigned NarrowingRatio = WideWidth / NarrowWidth;
20645 unsigned WideNumElts = WideBVT.getVectorNumElements();
20646 if (WideNumElts % NarrowingRatio != 0)
20647 return SDValue();
20649 // Bail out if the target does not support a narrower version of the binop.
20650 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20651 WideNumElts / NarrowingRatio);
20652 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20653 return SDValue();
20655 // If extraction is cheap, we don't need to look at the binop operands
20656 // for concat ops. The narrow binop alone makes this transform profitable.
20657 // We can't just reuse the original extract index operand because we may have
20658 // bitcasted.
20659 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20660 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20661 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20662 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20663 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20664 SDLoc DL(Extract);
20665 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20666 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20667 BinOp.getOperand(0), NewExtIndex);
20668 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20669 BinOp.getOperand(1), NewExtIndex);
20670 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20671 BinOp.getNode()->getFlags());
20672 return DAG.getBitcast(VT, NarrowBinOp);
20675 // Only handle the case where we are doubling and then halving. A larger ratio
20676 // may require more than two narrow binops to replace the wide binop.
20677 if (NarrowingRatio != 2)
20678 return SDValue();
20680 // TODO: The motivating case for this transform is an x86 AVX1 target. That
20681 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20682 // flavors, but no other 256-bit integer support. This could be extended to
20683 // handle any binop, but that may require fixing/adding other folds to avoid
20684 // codegen regressions.
20685 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20686 return SDValue();
20688 // We need at least one concatenation operation of a binop operand to make
20689 // this transform worthwhile. The concat must double the input vector sizes.
20690 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20691 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20692 return V.getOperand(ConcatOpNum);
20693 return SDValue();
20695 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20696 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20698 if (SubVecL || SubVecR) {
20699 // If a binop operand was not the result of a concat, we must extract a
20700 // half-sized operand for our new narrow binop:
20701 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20702 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20703 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20704 SDLoc DL(Extract);
20705 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20706 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20707 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20708 BinOp.getOperand(0), IndexC);
20710 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20711 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20712 BinOp.getOperand(1), IndexC);
20714 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20715 return DAG.getBitcast(VT, NarrowBinOp);
20718 return SDValue();
20721 /// If we are extracting a subvector from a wide vector load, convert to a
20722 /// narrow load to eliminate the extraction:
20723 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20724 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20725 // TODO: Add support for big-endian. The offset calculation must be adjusted.
20726 if (DAG.getDataLayout().isBigEndian())
20727 return SDValue();
20729 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20730 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
20731 return SDValue();
20733 // Allow targets to opt-out.
20734 EVT VT = Extract->getValueType(0);
20736 // We can only create byte sized loads.
20737 if (!VT.isByteSized())
20738 return SDValue();
20740 unsigned Index = Extract->getConstantOperandVal(1);
20741 unsigned NumElts = VT.getVectorMinNumElements();
20743 // The definition of EXTRACT_SUBVECTOR states that the index must be a
20744 // multiple of the minimum number of elements in the result type.
20745 assert(Index % NumElts == 0 && "The extract subvector index is not a "
20746 "multiple of the result's element count");
20748 // It's fine to use TypeSize here as we know the offset will not be negative.
20749 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20751 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20752 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20753 return SDValue();
20755 // The narrow load will be offset from the base address of the old load if
20756 // we are extracting from something besides index 0 (little-endian).
20757 SDLoc DL(Extract);
20759 // TODO: Use "BaseIndexOffset" to make this more effective.
20760 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20762 uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20763 MachineFunction &MF = DAG.getMachineFunction();
20764 MachineMemOperand *MMO;
20765 if (Offset.isScalable()) {
20766 MachinePointerInfo MPI =
20767 MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20768 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20769 } else
20770 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20771 StoreSize);
20773 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20774 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20775 return NewLd;
20778 /// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
20779 /// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
20780 /// EXTRACT_SUBVECTOR(Op?, ?),
20781 /// Mask'))
20782 /// iff it is legal and profitable to do so. Notably, the trimmed mask
20783 /// (containing only the elements that are extracted)
20784 /// must reference at most two subvectors.
20785 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
20786 SelectionDAG &DAG,
20787 const TargetLowering &TLI,
20788 bool LegalOperations) {
20789 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20790 "Must only be called on EXTRACT_SUBVECTOR's");
20792 SDValue N0 = N->getOperand(0);
20794 // Only deal with non-scalable vectors.
20795 EVT NarrowVT = N->getValueType(0);
20796 EVT WideVT = N0.getValueType();
20797 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
20798 return SDValue();
20800 // The operand must be a shufflevector.
20801 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
20802 if (!WideShuffleVector)
20803 return SDValue();
20805 // The old shuffleneeds to go away.
20806 if (!WideShuffleVector->hasOneUse())
20807 return SDValue();
20809 // And the narrow shufflevector that we'll form must be legal.
20810 if (LegalOperations &&
20811 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
20812 return SDValue();
20814 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
20815 int NumEltsExtracted = NarrowVT.getVectorNumElements();
20816 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
20817 "Extract index is not a multiple of the output vector length.");
20819 int WideNumElts = WideVT.getVectorNumElements();
20821 SmallVector<int, 16> NewMask;
20822 NewMask.reserve(NumEltsExtracted);
20823 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
20824 DemandedSubvectors;
20826 // Try to decode the wide mask into narrow mask from at most two subvectors.
20827 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
20828 NumEltsExtracted)) {
20829 assert((M >= -1) && (M < (2 * WideNumElts)) &&
20830 "Out-of-bounds shuffle mask?");
20832 if (M < 0) {
20833 // Does not depend on operands, does not require adjustment.
20834 NewMask.emplace_back(M);
20835 continue;
20838 // From which operand of the shuffle does this shuffle mask element pick?
20839 int WideShufOpIdx = M / WideNumElts;
20840 // Which element of that operand is picked?
20841 int OpEltIdx = M % WideNumElts;
20843 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
20844 "Shuffle mask vector decomposition failure.");
20846 // And which NumEltsExtracted-sized subvector of that operand is that?
20847 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
20848 // And which element within that subvector of that operand is that?
20849 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
20851 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
20852 "Shuffle mask subvector decomposition failure.");
20854 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
20855 WideShufOpIdx * WideNumElts) == M &&
20856 "Shuffle mask full decomposition failure.");
20858 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
20860 if (Op.isUndef()) {
20861 // Picking from an undef operand. Let's adjust mask instead.
20862 NewMask.emplace_back(-1);
20863 continue;
20866 // Profitability check: only deal with extractions from the first subvector.
20867 if (OpSubvecIdx != 0)
20868 return SDValue();
20870 const std::pair<SDValue, int> DemandedSubvector =
20871 std::make_pair(Op, OpSubvecIdx);
20873 if (DemandedSubvectors.insert(DemandedSubvector)) {
20874 if (DemandedSubvectors.size() > 2)
20875 return SDValue(); // We can't handle more than two subvectors.
20876 // How many elements into the WideVT does this subvector start?
20877 int Index = NumEltsExtracted * OpSubvecIdx;
20878 // Bail out if the extraction isn't going to be cheap.
20879 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
20880 return SDValue();
20883 // Ok, but from which operand of the new shuffle will this element pick?
20884 int NewOpIdx =
20885 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
20886 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
20888 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
20889 NewMask.emplace_back(AdjM);
20891 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
20892 assert(DemandedSubvectors.size() <= 2 &&
20893 "Should have ended up demanding at most two subvectors.");
20895 // Did we discover that the shuffle does not actually depend on operands?
20896 if (DemandedSubvectors.empty())
20897 return DAG.getUNDEF(NarrowVT);
20899 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
20900 // operand[s]/index[es], so there is no point in checking for it's legality.
20902 // Do not turn a legal shuffle into an illegal one.
20903 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
20904 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
20905 return SDValue();
20907 SDLoc DL(N);
20909 SmallVector<SDValue, 2> NewOps;
20910 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
20911 &DemandedSubvector : DemandedSubvectors) {
20912 // How many elements into the WideVT does this subvector start?
20913 int Index = NumEltsExtracted * DemandedSubvector.second;
20914 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
20915 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
20916 DemandedSubvector.first, IndexC));
20918 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
20919 "Should end up with either one or two ops");
20921 // If we ended up with only one operand, pad with an undef.
20922 if (NewOps.size() == 1)
20923 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
20925 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
20928 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20929 EVT NVT = N->getValueType(0);
20930 SDValue V = N->getOperand(0);
20931 uint64_t ExtIdx = N->getConstantOperandVal(1);
20933 // Extract from UNDEF is UNDEF.
20934 if (V.isUndef())
20935 return DAG.getUNDEF(NVT);
20937 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20938 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20939 return NarrowLoad;
20941 // Combine an extract of an extract into a single extract_subvector.
20942 // ext (ext X, C), 0 --> ext X, C
20943 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20944 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20945 V.getConstantOperandVal(1)) &&
20946 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20947 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20948 V.getOperand(1));
20952 // Try to move vector bitcast after extract_subv by scaling extraction index:
20953 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20954 if (V.getOpcode() == ISD::BITCAST &&
20955 V.getOperand(0).getValueType().isVector() &&
20956 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20957 SDValue SrcOp = V.getOperand(0);
20958 EVT SrcVT = SrcOp.getValueType();
20959 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20960 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20961 if ((SrcNumElts % DestNumElts) == 0) {
20962 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20963 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20964 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20965 NewExtEC);
20966 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20967 SDLoc DL(N);
20968 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20969 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20970 V.getOperand(0), NewIndex);
20971 return DAG.getBitcast(NVT, NewExtract);
20974 if ((DestNumElts % SrcNumElts) == 0) {
20975 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20976 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20977 ElementCount NewExtEC =
20978 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20979 EVT ScalarVT = SrcVT.getScalarType();
20980 if ((ExtIdx % DestSrcRatio) == 0) {
20981 SDLoc DL(N);
20982 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20983 EVT NewExtVT =
20984 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20985 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20986 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20987 SDValue NewExtract =
20988 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20989 V.getOperand(0), NewIndex);
20990 return DAG.getBitcast(NVT, NewExtract);
20992 if (NewExtEC.isScalar() &&
20993 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20994 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20995 SDValue NewExtract =
20996 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20997 V.getOperand(0), NewIndex);
20998 return DAG.getBitcast(NVT, NewExtract);
21005 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21006 unsigned ExtNumElts = NVT.getVectorMinNumElements();
21007 EVT ConcatSrcVT = V.getOperand(0).getValueType();
21008 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21009 "Concat and extract subvector do not change element type");
21010 assert((ExtIdx % ExtNumElts) == 0 &&
21011 "Extract index is not a multiple of the input vector length.");
21013 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21014 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21016 // If the concatenated source types match this extract, it's a direct
21017 // simplification:
21018 // extract_subvec (concat V1, V2, ...), i --> Vi
21019 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21020 return V.getOperand(ConcatOpIdx);
21022 // If the concatenated source vectors are a multiple length of this extract,
21023 // then extract a fraction of one of those source vectors directly from a
21024 // concat operand. Example:
21025 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21026 // v2i8 extract_subvec v8i8 Y, 6
21027 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21028 ConcatSrcNumElts % ExtNumElts == 0) {
21029 SDLoc DL(N);
21030 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21031 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21032 "Trying to extract from >1 concat operand?");
21033 assert(NewExtIdx % ExtNumElts == 0 &&
21034 "Extract index is not a multiple of the input vector length.");
21035 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21036 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21037 V.getOperand(ConcatOpIdx), NewIndexC);
21041 if (SDValue V =
21042 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21043 return V;
21045 V = peekThroughBitcasts(V);
21047 // If the input is a build vector. Try to make a smaller build vector.
21048 if (V.getOpcode() == ISD::BUILD_VECTOR) {
21049 EVT InVT = V.getValueType();
21050 unsigned ExtractSize = NVT.getSizeInBits();
21051 unsigned EltSize = InVT.getScalarSizeInBits();
21052 // Only do this if we won't split any elements.
21053 if (ExtractSize % EltSize == 0) {
21054 unsigned NumElems = ExtractSize / EltSize;
21055 EVT EltVT = InVT.getVectorElementType();
21056 EVT ExtractVT =
21057 NumElems == 1 ? EltVT
21058 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21059 if ((Level < AfterLegalizeDAG ||
21060 (NumElems == 1 ||
21061 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21062 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21063 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21065 if (NumElems == 1) {
21066 SDValue Src = V->getOperand(IdxVal);
21067 if (EltVT != Src.getValueType())
21068 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21069 return DAG.getBitcast(NVT, Src);
21072 // Extract the pieces from the original build_vector.
21073 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21074 V->ops().slice(IdxVal, NumElems));
21075 return DAG.getBitcast(NVT, BuildVec);
21080 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21081 // Handle only simple case where vector being inserted and vector
21082 // being extracted are of same size.
21083 EVT SmallVT = V.getOperand(1).getValueType();
21084 if (!NVT.bitsEq(SmallVT))
21085 return SDValue();
21087 // Combine:
21088 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21089 // Into:
21090 // indices are equal or bit offsets are equal => V1
21091 // otherwise => (extract_subvec V1, ExtIdx)
21092 uint64_t InsIdx = V.getConstantOperandVal(2);
21093 if (InsIdx * SmallVT.getScalarSizeInBits() ==
21094 ExtIdx * NVT.getScalarSizeInBits()) {
21095 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21096 return SDValue();
21098 return DAG.getBitcast(NVT, V.getOperand(1));
21100 return DAG.getNode(
21101 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21102 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21103 N->getOperand(1));
21106 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21107 return NarrowBOp;
21109 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21110 return SDValue(N, 0);
21112 return SDValue();
21115 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21116 /// followed by concatenation. Narrow vector ops may have better performance
21117 /// than wide ops, and this can unlock further narrowing of other vector ops.
21118 /// Targets can invert this transform later if it is not profitable.
21119 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21120 SelectionDAG &DAG) {
21121 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21122 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21123 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21124 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21125 return SDValue();
21127 // Split the wide shuffle mask into halves. Any mask element that is accessing
21128 // operand 1 is offset down to account for narrowing of the vectors.
21129 ArrayRef<int> Mask = Shuf->getMask();
21130 EVT VT = Shuf->getValueType(0);
21131 unsigned NumElts = VT.getVectorNumElements();
21132 unsigned HalfNumElts = NumElts / 2;
21133 SmallVector<int, 16> Mask0(HalfNumElts, -1);
21134 SmallVector<int, 16> Mask1(HalfNumElts, -1);
21135 for (unsigned i = 0; i != NumElts; ++i) {
21136 if (Mask[i] == -1)
21137 continue;
21138 // If we reference the upper (undef) subvector then the element is undef.
21139 if ((Mask[i] % NumElts) >= HalfNumElts)
21140 continue;
21141 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21142 if (i < HalfNumElts)
21143 Mask0[i] = M;
21144 else
21145 Mask1[i - HalfNumElts] = M;
21148 // Ask the target if this is a valid transform.
21149 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21150 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21151 HalfNumElts);
21152 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21153 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21154 return SDValue();
21156 // shuffle (concat X, undef), (concat Y, undef), Mask -->
21157 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21158 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21159 SDLoc DL(Shuf);
21160 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21161 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21162 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21165 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21166 // or turn a shuffle of a single concat into simpler shuffle then concat.
21167 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21168 EVT VT = N->getValueType(0);
21169 unsigned NumElts = VT.getVectorNumElements();
21171 SDValue N0 = N->getOperand(0);
21172 SDValue N1 = N->getOperand(1);
21173 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21174 ArrayRef<int> Mask = SVN->getMask();
21176 SmallVector<SDValue, 4> Ops;
21177 EVT ConcatVT = N0.getOperand(0).getValueType();
21178 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21179 unsigned NumConcats = NumElts / NumElemsPerConcat;
21181 auto IsUndefMaskElt = [](int i) { return i == -1; };
21183 // Special case: shuffle(concat(A,B)) can be more efficiently represented
21184 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21185 // half vector elements.
21186 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21187 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21188 IsUndefMaskElt)) {
21189 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21190 N0.getOperand(1),
21191 Mask.slice(0, NumElemsPerConcat));
21192 N1 = DAG.getUNDEF(ConcatVT);
21193 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21196 // Look at every vector that's inserted. We're looking for exact
21197 // subvector-sized copies from a concatenated vector
21198 for (unsigned I = 0; I != NumConcats; ++I) {
21199 unsigned Begin = I * NumElemsPerConcat;
21200 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21202 // Make sure we're dealing with a copy.
21203 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21204 Ops.push_back(DAG.getUNDEF(ConcatVT));
21205 continue;
21208 int OpIdx = -1;
21209 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21210 if (IsUndefMaskElt(SubMask[i]))
21211 continue;
21212 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21213 return SDValue();
21214 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21215 if (0 <= OpIdx && EltOpIdx != OpIdx)
21216 return SDValue();
21217 OpIdx = EltOpIdx;
21219 assert(0 <= OpIdx && "Unknown concat_vectors op");
21221 if (OpIdx < (int)N0.getNumOperands())
21222 Ops.push_back(N0.getOperand(OpIdx));
21223 else
21224 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21227 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21230 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21231 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21233 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21234 // a simplification in some sense, but it isn't appropriate in general: some
21235 // BUILD_VECTORs are substantially cheaper than others. The general case
21236 // of a BUILD_VECTOR requires inserting each element individually (or
21237 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21238 // all constants is a single constant pool load. A BUILD_VECTOR where each
21239 // element is identical is a splat. A BUILD_VECTOR where most of the operands
21240 // are undef lowers to a small number of element insertions.
21242 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21243 // We don't fold shuffles where one side is a non-zero constant, and we don't
21244 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21245 // non-constant operands. This seems to work out reasonably well in practice.
21246 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
21247 SelectionDAG &DAG,
21248 const TargetLowering &TLI) {
21249 EVT VT = SVN->getValueType(0);
21250 unsigned NumElts = VT.getVectorNumElements();
21251 SDValue N0 = SVN->getOperand(0);
21252 SDValue N1 = SVN->getOperand(1);
21254 if (!N0->hasOneUse())
21255 return SDValue();
21257 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21258 // discussed above.
21259 if (!N1.isUndef()) {
21260 if (!N1->hasOneUse())
21261 return SDValue();
21263 bool N0AnyConst = isAnyConstantBuildVector(N0);
21264 bool N1AnyConst = isAnyConstantBuildVector(N1);
21265 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21266 return SDValue();
21267 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21268 return SDValue();
21271 // If both inputs are splats of the same value then we can safely merge this
21272 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21273 bool IsSplat = false;
21274 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21275 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21276 if (BV0 && BV1)
21277 if (SDValue Splat0 = BV0->getSplatValue())
21278 IsSplat = (Splat0 == BV1->getSplatValue());
21280 SmallVector<SDValue, 8> Ops;
21281 SmallSet<SDValue, 16> DuplicateOps;
21282 for (int M : SVN->getMask()) {
21283 SDValue Op = DAG.getUNDEF(VT.getScalarType());
21284 if (M >= 0) {
21285 int Idx = M < (int)NumElts ? M : M - NumElts;
21286 SDValue &S = (M < (int)NumElts ? N0 : N1);
21287 if (S.getOpcode() == ISD::BUILD_VECTOR) {
21288 Op = S.getOperand(Idx);
21289 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21290 SDValue Op0 = S.getOperand(0);
21291 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21292 } else {
21293 // Operand can't be combined - bail out.
21294 return SDValue();
21298 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21299 // generating a splat; semantically, this is fine, but it's likely to
21300 // generate low-quality code if the target can't reconstruct an appropriate
21301 // shuffle.
21302 if (!Op.isUndef() && !isIntOrFPConstant(Op))
21303 if (!IsSplat && !DuplicateOps.insert(Op).second)
21304 return SDValue();
21306 Ops.push_back(Op);
21309 // BUILD_VECTOR requires all inputs to be of the same type, find the
21310 // maximum type and extend them all.
21311 EVT SVT = VT.getScalarType();
21312 if (SVT.isInteger())
21313 for (SDValue &Op : Ops)
21314 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21315 if (SVT != VT.getScalarType())
21316 for (SDValue &Op : Ops)
21317 Op = TLI.isZExtFree(Op.getValueType(), SVT)
21318 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21319 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
21320 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21323 // Match shuffles that can be converted to any_vector_extend_in_reg.
21324 // This is often generated during legalization.
21325 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21326 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21327 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
21328 SelectionDAG &DAG,
21329 const TargetLowering &TLI,
21330 bool LegalOperations) {
21331 EVT VT = SVN->getValueType(0);
21332 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21334 // TODO Add support for big-endian when we have a test case.
21335 if (!VT.isInteger() || IsBigEndian)
21336 return SDValue();
21338 unsigned NumElts = VT.getVectorNumElements();
21339 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21340 ArrayRef<int> Mask = SVN->getMask();
21341 SDValue N0 = SVN->getOperand(0);
21343 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21344 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21345 for (unsigned i = 0; i != NumElts; ++i) {
21346 if (Mask[i] < 0)
21347 continue;
21348 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21349 continue;
21350 return false;
21352 return true;
21355 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21356 // power-of-2 extensions as they are the most likely.
21357 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21358 // Check for non power of 2 vector sizes
21359 if (NumElts % Scale != 0)
21360 continue;
21361 if (!isAnyExtend(Scale))
21362 continue;
21364 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
21365 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21366 // Never create an illegal type. Only create unsupported operations if we
21367 // are pre-legalization.
21368 if (TLI.isTypeLegal(OutVT))
21369 if (!LegalOperations ||
21370 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
21371 return DAG.getBitcast(VT,
21372 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
21373 SDLoc(SVN), OutVT, N0));
21376 return SDValue();
21379 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21380 // each source element of a large type into the lowest elements of a smaller
21381 // destination type. This is often generated during legalization.
21382 // If the source node itself was a '*_extend_vector_inreg' node then we should
21383 // then be able to remove it.
21384 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
21385 SelectionDAG &DAG) {
21386 EVT VT = SVN->getValueType(0);
21387 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21389 // TODO Add support for big-endian when we have a test case.
21390 if (!VT.isInteger() || IsBigEndian)
21391 return SDValue();
21393 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21395 unsigned Opcode = N0.getOpcode();
21396 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21397 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
21398 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
21399 return SDValue();
21401 SDValue N00 = N0.getOperand(0);
21402 ArrayRef<int> Mask = SVN->getMask();
21403 unsigned NumElts = VT.getVectorNumElements();
21404 unsigned EltSizeInBits = VT.getScalarSizeInBits();
21405 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21406 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
21408 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
21409 return SDValue();
21410 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21412 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21413 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21414 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21415 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21416 for (unsigned i = 0; i != NumElts; ++i) {
21417 if (Mask[i] < 0)
21418 continue;
21419 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21420 continue;
21421 return false;
21423 return true;
21426 // At the moment we just handle the case where we've truncated back to the
21427 // same size as before the extension.
21428 // TODO: handle more extension/truncation cases as cases arise.
21429 if (EltSizeInBits != ExtSrcSizeInBits)
21430 return SDValue();
21432 // We can remove *extend_vector_inreg only if the truncation happens at
21433 // the same scale as the extension.
21434 if (isTruncate(ExtScale))
21435 return DAG.getBitcast(VT, N00);
21437 return SDValue();
21440 // Combine shuffles of splat-shuffles of the form:
21441 // shuffle (shuffle V, undef, splat-mask), undef, M
21442 // If splat-mask contains undef elements, we need to be careful about
21443 // introducing undef's in the folded mask which are not the result of composing
21444 // the masks of the shuffles.
21445 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21446 SelectionDAG &DAG) {
21447 if (!Shuf->getOperand(1).isUndef())
21448 return SDValue();
21449 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21450 if (!Splat || !Splat->isSplat())
21451 return SDValue();
21453 ArrayRef<int> ShufMask = Shuf->getMask();
21454 ArrayRef<int> SplatMask = Splat->getMask();
21455 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21457 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21458 // every undef mask element in the splat-shuffle has a corresponding undef
21459 // element in the user-shuffle's mask or if the composition of mask elements
21460 // would result in undef.
21461 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21462 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21463 // In this case it is not legal to simplify to the splat-shuffle because we
21464 // may be exposing the users of the shuffle an undef element at index 1
21465 // which was not there before the combine.
21466 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21467 // In this case the composition of masks yields SplatMask, so it's ok to
21468 // simplify to the splat-shuffle.
21469 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21470 // In this case the composed mask includes all undef elements of SplatMask
21471 // and in addition sets element zero to undef. It is safe to simplify to
21472 // the splat-shuffle.
21473 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21474 ArrayRef<int> SplatMask) {
21475 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21476 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21477 SplatMask[UserMask[i]] != -1)
21478 return false;
21479 return true;
21481 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21482 return Shuf->getOperand(0);
21484 // Create a new shuffle with a mask that is composed of the two shuffles'
21485 // masks.
21486 SmallVector<int, 32> NewMask;
21487 for (int Idx : ShufMask)
21488 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21490 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21491 Splat->getOperand(0), Splat->getOperand(1),
21492 NewMask);
21495 /// Combine shuffle of shuffle of the form:
21496 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21497 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21498 SelectionDAG &DAG) {
21499 if (!OuterShuf->getOperand(1).isUndef())
21500 return SDValue();
21501 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21502 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21503 return SDValue();
21505 ArrayRef<int> OuterMask = OuterShuf->getMask();
21506 ArrayRef<int> InnerMask = InnerShuf->getMask();
21507 unsigned NumElts = OuterMask.size();
21508 assert(NumElts == InnerMask.size() && "Mask length mismatch");
21509 SmallVector<int, 32> CombinedMask(NumElts, -1);
21510 int SplatIndex = -1;
21511 for (unsigned i = 0; i != NumElts; ++i) {
21512 // Undef lanes remain undef.
21513 int OuterMaskElt = OuterMask[i];
21514 if (OuterMaskElt == -1)
21515 continue;
21517 // Peek through the shuffle masks to get the underlying source element.
21518 int InnerMaskElt = InnerMask[OuterMaskElt];
21519 if (InnerMaskElt == -1)
21520 continue;
21522 // Initialize the splatted element.
21523 if (SplatIndex == -1)
21524 SplatIndex = InnerMaskElt;
21526 // Non-matching index - this is not a splat.
21527 if (SplatIndex != InnerMaskElt)
21528 return SDValue();
21530 CombinedMask[i] = InnerMaskElt;
21532 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21533 getSplatIndex(CombinedMask) != -1) &&
21534 "Expected a splat mask");
21536 // TODO: The transform may be a win even if the mask is not legal.
21537 EVT VT = OuterShuf->getValueType(0);
21538 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21539 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21540 return SDValue();
21542 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21543 InnerShuf->getOperand(1), CombinedMask);
21546 /// If the shuffle mask is taking exactly one element from the first vector
21547 /// operand and passing through all other elements from the second vector
21548 /// operand, return the index of the mask element that is choosing an element
21549 /// from the first operand. Otherwise, return -1.
21550 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21551 int MaskSize = Mask.size();
21552 int EltFromOp0 = -1;
21553 // TODO: This does not match if there are undef elements in the shuffle mask.
21554 // Should we ignore undefs in the shuffle mask instead? The trade-off is
21555 // removing an instruction (a shuffle), but losing the knowledge that some
21556 // vector lanes are not needed.
21557 for (int i = 0; i != MaskSize; ++i) {
21558 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21559 // We're looking for a shuffle of exactly one element from operand 0.
21560 if (EltFromOp0 != -1)
21561 return -1;
21562 EltFromOp0 = i;
21563 } else if (Mask[i] != i + MaskSize) {
21564 // Nothing from operand 1 can change lanes.
21565 return -1;
21568 return EltFromOp0;
21571 /// If a shuffle inserts exactly one element from a source vector operand into
21572 /// another vector operand and we can access the specified element as a scalar,
21573 /// then we can eliminate the shuffle.
21574 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21575 SelectionDAG &DAG) {
21576 // First, check if we are taking one element of a vector and shuffling that
21577 // element into another vector.
21578 ArrayRef<int> Mask = Shuf->getMask();
21579 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21580 SDValue Op0 = Shuf->getOperand(0);
21581 SDValue Op1 = Shuf->getOperand(1);
21582 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21583 if (ShufOp0Index == -1) {
21584 // Commute mask and check again.
21585 ShuffleVectorSDNode::commuteMask(CommutedMask);
21586 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21587 if (ShufOp0Index == -1)
21588 return SDValue();
21589 // Commute operands to match the commuted shuffle mask.
21590 std::swap(Op0, Op1);
21591 Mask = CommutedMask;
21594 // The shuffle inserts exactly one element from operand 0 into operand 1.
21595 // Now see if we can access that element as a scalar via a real insert element
21596 // instruction.
21597 // TODO: We can try harder to locate the element as a scalar. Examples: it
21598 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21599 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21600 "Shuffle mask value must be from operand 0");
21601 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21602 return SDValue();
21604 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21605 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21606 return SDValue();
21608 // There's an existing insertelement with constant insertion index, so we
21609 // don't need to check the legality/profitability of a replacement operation
21610 // that differs at most in the constant value. The target should be able to
21611 // lower any of those in a similar way. If not, legalization will expand this
21612 // to a scalar-to-vector plus shuffle.
21614 // Note that the shuffle may move the scalar from the position that the insert
21615 // element used. Therefore, our new insert element occurs at the shuffle's
21616 // mask index value, not the insert's index value.
21617 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21618 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21619 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21620 Op1, Op0.getOperand(1), NewInsIndex);
21623 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21624 /// completely. This has the potential to lose undef knowledge because the first
21625 /// shuffle may not have an undef mask element where the second one does. So
21626 /// only call this after doing simplifications based on demanded elements.
21627 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21628 // shuf (shuf0 X, Y, Mask0), undef, Mask
21629 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21630 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21631 return SDValue();
21633 ArrayRef<int> Mask = Shuf->getMask();
21634 ArrayRef<int> Mask0 = Shuf0->getMask();
21635 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21636 // Ignore undef elements.
21637 if (Mask[i] == -1)
21638 continue;
21639 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21641 // Is the element of the shuffle operand chosen by this shuffle the same as
21642 // the element chosen by the shuffle operand itself?
21643 if (Mask0[Mask[i]] != Mask0[i])
21644 return SDValue();
21646 // Every element of this shuffle is identical to the result of the previous
21647 // shuffle, so we can replace this value.
21648 return Shuf->getOperand(0);
21651 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21652 EVT VT = N->getValueType(0);
21653 unsigned NumElts = VT.getVectorNumElements();
21655 SDValue N0 = N->getOperand(0);
21656 SDValue N1 = N->getOperand(1);
21658 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21660 // Canonicalize shuffle undef, undef -> undef
21661 if (N0.isUndef() && N1.isUndef())
21662 return DAG.getUNDEF(VT);
21664 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21666 // Canonicalize shuffle v, v -> v, undef
21667 if (N0 == N1)
21668 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
21669 createUnaryMask(SVN->getMask(), NumElts));
21671 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
21672 if (N0.isUndef())
21673 return DAG.getCommutedVectorShuffle(*SVN);
21675 // Remove references to rhs if it is undef
21676 if (N1.isUndef()) {
21677 bool Changed = false;
21678 SmallVector<int, 8> NewMask;
21679 for (unsigned i = 0; i != NumElts; ++i) {
21680 int Idx = SVN->getMaskElt(i);
21681 if (Idx >= (int)NumElts) {
21682 Idx = -1;
21683 Changed = true;
21685 NewMask.push_back(Idx);
21687 if (Changed)
21688 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21691 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21692 return InsElt;
21694 // A shuffle of a single vector that is a splatted value can always be folded.
21695 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21696 return V;
21698 if (SDValue V = formSplatFromShuffles(SVN, DAG))
21699 return V;
21701 // If it is a splat, check if the argument vector is another splat or a
21702 // build_vector.
21703 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21704 int SplatIndex = SVN->getSplatIndex();
21705 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21706 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21707 // splat (vector_bo L, R), Index -->
21708 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21709 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21710 SDLoc DL(N);
21711 EVT EltVT = VT.getScalarType();
21712 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21713 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21714 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21715 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21716 N0.getNode()->getFlags());
21717 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21718 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21719 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21722 // If this is a bit convert that changes the element type of the vector but
21723 // not the number of vector elements, look through it. Be careful not to
21724 // look though conversions that change things like v4f32 to v2f64.
21725 SDNode *V = N0.getNode();
21726 if (V->getOpcode() == ISD::BITCAST) {
21727 SDValue ConvInput = V->getOperand(0);
21728 if (ConvInput.getValueType().isVector() &&
21729 ConvInput.getValueType().getVectorNumElements() == NumElts)
21730 V = ConvInput.getNode();
21733 if (V->getOpcode() == ISD::BUILD_VECTOR) {
21734 assert(V->getNumOperands() == NumElts &&
21735 "BUILD_VECTOR has wrong number of operands");
21736 SDValue Base;
21737 bool AllSame = true;
21738 for (unsigned i = 0; i != NumElts; ++i) {
21739 if (!V->getOperand(i).isUndef()) {
21740 Base = V->getOperand(i);
21741 break;
21744 // Splat of <u, u, u, u>, return <u, u, u, u>
21745 if (!Base.getNode())
21746 return N0;
21747 for (unsigned i = 0; i != NumElts; ++i) {
21748 if (V->getOperand(i) != Base) {
21749 AllSame = false;
21750 break;
21753 // Splat of <x, x, x, x>, return <x, x, x, x>
21754 if (AllSame)
21755 return N0;
21757 // Canonicalize any other splat as a build_vector.
21758 SDValue Splatted = V->getOperand(SplatIndex);
21759 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21760 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21762 // We may have jumped through bitcasts, so the type of the
21763 // BUILD_VECTOR may not match the type of the shuffle.
21764 if (V->getValueType(0) != VT)
21765 NewBV = DAG.getBitcast(VT, NewBV);
21766 return NewBV;
21770 // Simplify source operands based on shuffle mask.
21771 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21772 return SDValue(N, 0);
21774 // This is intentionally placed after demanded elements simplification because
21775 // it could eliminate knowledge of undef elements created by this shuffle.
21776 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21777 return ShufOp;
21779 // Match shuffles that can be converted to any_vector_extend_in_reg.
21780 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21781 return V;
21783 // Combine "truncate_vector_in_reg" style shuffles.
21784 if (SDValue V = combineTruncationShuffle(SVN, DAG))
21785 return V;
21787 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21788 Level < AfterLegalizeVectorOps &&
21789 (N1.isUndef() ||
21790 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21791 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21792 if (SDValue V = partitionShuffleOfConcats(N, DAG))
21793 return V;
21796 // A shuffle of a concat of the same narrow vector can be reduced to use
21797 // only low-half elements of a concat with undef:
21798 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21799 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21800 N0.getNumOperands() == 2 &&
21801 N0.getOperand(0) == N0.getOperand(1)) {
21802 int HalfNumElts = (int)NumElts / 2;
21803 SmallVector<int, 8> NewMask;
21804 for (unsigned i = 0; i != NumElts; ++i) {
21805 int Idx = SVN->getMaskElt(i);
21806 if (Idx >= HalfNumElts) {
21807 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21808 Idx -= HalfNumElts;
21810 NewMask.push_back(Idx);
21812 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21813 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21814 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21815 N0.getOperand(0), UndefVec);
21816 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21820 // See if we can replace a shuffle with an insert_subvector.
21821 // e.g. v2i32 into v8i32:
21822 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21823 // --> insert_subvector(lhs,rhs1,4).
21824 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21825 TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21826 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21827 // Ensure RHS subvectors are legal.
21828 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21829 EVT SubVT = RHS.getOperand(0).getValueType();
21830 int NumSubVecs = RHS.getNumOperands();
21831 int NumSubElts = SubVT.getVectorNumElements();
21832 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21833 if (!TLI.isTypeLegal(SubVT))
21834 return SDValue();
21836 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21837 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21838 return SDValue();
21840 // Search [NumSubElts] spans for RHS sequence.
21841 // TODO: Can we avoid nested loops to increase performance?
21842 SmallVector<int> InsertionMask(NumElts);
21843 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21844 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21845 // Reset mask to identity.
21846 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21848 // Add subvector insertion.
21849 std::iota(InsertionMask.begin() + SubIdx,
21850 InsertionMask.begin() + SubIdx + NumSubElts,
21851 NumElts + (SubVec * NumSubElts));
21853 // See if the shuffle mask matches the reference insertion mask.
21854 bool MatchingShuffle = true;
21855 for (int i = 0; i != (int)NumElts; ++i) {
21856 int ExpectIdx = InsertionMask[i];
21857 int ActualIdx = Mask[i];
21858 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21859 MatchingShuffle = false;
21860 break;
21864 if (MatchingShuffle)
21865 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
21866 RHS.getOperand(SubVec),
21867 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
21870 return SDValue();
21872 ArrayRef<int> Mask = SVN->getMask();
21873 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
21874 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
21875 return InsertN1;
21876 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
21877 SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
21878 ShuffleVectorSDNode::commuteMask(CommuteMask);
21879 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
21880 return InsertN0;
21884 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21885 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21886 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21887 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21888 return Res;
21890 // If this shuffle only has a single input that is a bitcasted shuffle,
21891 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21892 // back to their original types.
21893 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21894 N1.isUndef() && Level < AfterLegalizeVectorOps &&
21895 TLI.isTypeLegal(VT)) {
21897 SDValue BC0 = peekThroughOneUseBitcasts(N0);
21898 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21899 EVT SVT = VT.getScalarType();
21900 EVT InnerVT = BC0->getValueType(0);
21901 EVT InnerSVT = InnerVT.getScalarType();
21903 // Determine which shuffle works with the smaller scalar type.
21904 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21905 EVT ScaleSVT = ScaleVT.getScalarType();
21907 if (TLI.isTypeLegal(ScaleVT) &&
21908 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21909 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21910 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21911 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21913 // Scale the shuffle masks to the smaller scalar type.
21914 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21915 SmallVector<int, 8> InnerMask;
21916 SmallVector<int, 8> OuterMask;
21917 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21918 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21920 // Merge the shuffle masks.
21921 SmallVector<int, 8> NewMask;
21922 for (int M : OuterMask)
21923 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21925 // Test for shuffle mask legality over both commutations.
21926 SDValue SV0 = BC0->getOperand(0);
21927 SDValue SV1 = BC0->getOperand(1);
21928 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21929 if (!LegalMask) {
21930 std::swap(SV0, SV1);
21931 ShuffleVectorSDNode::commuteMask(NewMask);
21932 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21935 if (LegalMask) {
21936 SV0 = DAG.getBitcast(ScaleVT, SV0);
21937 SV1 = DAG.getBitcast(ScaleVT, SV1);
21938 return DAG.getBitcast(
21939 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21945 // Compute the combined shuffle mask for a shuffle with SV0 as the first
21946 // operand, and SV1 as the second operand.
21947 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21948 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21949 auto MergeInnerShuffle =
21950 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21951 ShuffleVectorSDNode *OtherSVN, SDValue N1,
21952 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21953 SmallVectorImpl<int> &Mask) -> bool {
21954 // Don't try to fold splats; they're likely to simplify somehow, or they
21955 // might be free.
21956 if (OtherSVN->isSplat())
21957 return false;
21959 SV0 = SV1 = SDValue();
21960 Mask.clear();
21962 for (unsigned i = 0; i != NumElts; ++i) {
21963 int Idx = SVN->getMaskElt(i);
21964 if (Idx < 0) {
21965 // Propagate Undef.
21966 Mask.push_back(Idx);
21967 continue;
21970 if (Commute)
21971 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21973 SDValue CurrentVec;
21974 if (Idx < (int)NumElts) {
21975 // This shuffle index refers to the inner shuffle N0. Lookup the inner
21976 // shuffle mask to identify which vector is actually referenced.
21977 Idx = OtherSVN->getMaskElt(Idx);
21978 if (Idx < 0) {
21979 // Propagate Undef.
21980 Mask.push_back(Idx);
21981 continue;
21983 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21984 : OtherSVN->getOperand(1);
21985 } else {
21986 // This shuffle index references an element within N1.
21987 CurrentVec = N1;
21990 // Simple case where 'CurrentVec' is UNDEF.
21991 if (CurrentVec.isUndef()) {
21992 Mask.push_back(-1);
21993 continue;
21996 // Canonicalize the shuffle index. We don't know yet if CurrentVec
21997 // will be the first or second operand of the combined shuffle.
21998 Idx = Idx % NumElts;
21999 if (!SV0.getNode() || SV0 == CurrentVec) {
22000 // Ok. CurrentVec is the left hand side.
22001 // Update the mask accordingly.
22002 SV0 = CurrentVec;
22003 Mask.push_back(Idx);
22004 continue;
22006 if (!SV1.getNode() || SV1 == CurrentVec) {
22007 // Ok. CurrentVec is the right hand side.
22008 // Update the mask accordingly.
22009 SV1 = CurrentVec;
22010 Mask.push_back(Idx + NumElts);
22011 continue;
22014 // Last chance - see if the vector is another shuffle and if it
22015 // uses one of the existing candidate shuffle ops.
22016 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22017 int InnerIdx = CurrentSVN->getMaskElt(Idx);
22018 if (InnerIdx < 0) {
22019 Mask.push_back(-1);
22020 continue;
22022 SDValue InnerVec = (InnerIdx < (int)NumElts)
22023 ? CurrentSVN->getOperand(0)
22024 : CurrentSVN->getOperand(1);
22025 if (InnerVec.isUndef()) {
22026 Mask.push_back(-1);
22027 continue;
22029 InnerIdx %= NumElts;
22030 if (InnerVec == SV0) {
22031 Mask.push_back(InnerIdx);
22032 continue;
22034 if (InnerVec == SV1) {
22035 Mask.push_back(InnerIdx + NumElts);
22036 continue;
22040 // Bail out if we cannot convert the shuffle pair into a single shuffle.
22041 return false;
22044 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22045 return true;
22047 // Avoid introducing shuffles with illegal mask.
22048 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22049 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22050 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22051 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22052 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22053 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22054 if (TLI.isShuffleMaskLegal(Mask, VT))
22055 return true;
22057 std::swap(SV0, SV1);
22058 ShuffleVectorSDNode::commuteMask(Mask);
22059 return TLI.isShuffleMaskLegal(Mask, VT);
22062 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22063 // Canonicalize shuffles according to rules:
22064 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22065 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22066 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22067 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22068 N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22069 // The incoming shuffle must be of the same type as the result of the
22070 // current shuffle.
22071 assert(N1->getOperand(0).getValueType() == VT &&
22072 "Shuffle types don't match");
22074 SDValue SV0 = N1->getOperand(0);
22075 SDValue SV1 = N1->getOperand(1);
22076 bool HasSameOp0 = N0 == SV0;
22077 bool IsSV1Undef = SV1.isUndef();
22078 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22079 // Commute the operands of this shuffle so merging below will trigger.
22080 return DAG.getCommutedVectorShuffle(*SVN);
22083 // Canonicalize splat shuffles to the RHS to improve merging below.
22084 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22085 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22086 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22087 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22088 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22089 return DAG.getCommutedVectorShuffle(*SVN);
22092 // Try to fold according to rules:
22093 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22094 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22095 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22096 // Don't try to fold shuffles with illegal type.
22097 // Only fold if this shuffle is the only user of the other shuffle.
22098 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22099 for (int i = 0; i != 2; ++i) {
22100 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22101 N->isOnlyUserOf(N->getOperand(i).getNode())) {
22102 // The incoming shuffle must be of the same type as the result of the
22103 // current shuffle.
22104 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22105 assert(OtherSV->getOperand(0).getValueType() == VT &&
22106 "Shuffle types don't match");
22108 SDValue SV0, SV1;
22109 SmallVector<int, 4> Mask;
22110 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22111 SV0, SV1, Mask)) {
22112 // Check if all indices in Mask are Undef. In case, propagate Undef.
22113 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22114 return DAG.getUNDEF(VT);
22116 return DAG.getVectorShuffle(VT, SDLoc(N),
22117 SV0 ? SV0 : DAG.getUNDEF(VT),
22118 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22123 // Merge shuffles through binops if we are able to merge it with at least
22124 // one other shuffles.
22125 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22126 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22127 unsigned SrcOpcode = N0.getOpcode();
22128 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22129 (N1.isUndef() ||
22130 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22131 // Get binop source ops, or just pass on the undef.
22132 SDValue Op00 = N0.getOperand(0);
22133 SDValue Op01 = N0.getOperand(1);
22134 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22135 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22136 // TODO: We might be able to relax the VT check but we don't currently
22137 // have any isBinOp() that has different result/ops VTs so play safe until
22138 // we have test coverage.
22139 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22140 Op01.getValueType() == VT && Op11.getValueType() == VT &&
22141 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22142 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22143 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22144 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22145 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22146 SmallVectorImpl<int> &Mask, bool LeftOp,
22147 bool Commute) {
22148 SDValue InnerN = Commute ? N1 : N0;
22149 SDValue Op0 = LeftOp ? Op00 : Op01;
22150 SDValue Op1 = LeftOp ? Op10 : Op11;
22151 if (Commute)
22152 std::swap(Op0, Op1);
22153 // Only accept the merged shuffle if we don't introduce undef elements,
22154 // or the inner shuffle already contained undef elements.
22155 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22156 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22157 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22158 Mask) &&
22159 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22160 llvm::none_of(Mask, [](int M) { return M < 0; }));
22163 // Ensure we don't increase the number of shuffles - we must merge a
22164 // shuffle from at least one of the LHS and RHS ops.
22165 bool MergedLeft = false;
22166 SDValue LeftSV0, LeftSV1;
22167 SmallVector<int, 4> LeftMask;
22168 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22169 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22170 MergedLeft = true;
22171 } else {
22172 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22173 LeftSV0 = Op00, LeftSV1 = Op10;
22176 bool MergedRight = false;
22177 SDValue RightSV0, RightSV1;
22178 SmallVector<int, 4> RightMask;
22179 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22180 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22181 MergedRight = true;
22182 } else {
22183 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22184 RightSV0 = Op01, RightSV1 = Op11;
22187 if (MergedLeft || MergedRight) {
22188 SDLoc DL(N);
22189 SDValue LHS = DAG.getVectorShuffle(
22190 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22191 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22192 SDValue RHS = DAG.getVectorShuffle(
22193 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22194 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22195 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22201 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22202 return V;
22204 return SDValue();
22207 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22208 SDValue InVal = N->getOperand(0);
22209 EVT VT = N->getValueType(0);
22211 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22212 // with a VECTOR_SHUFFLE and possible truncate.
22213 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22214 VT.isFixedLengthVector() &&
22215 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22216 SDValue InVec = InVal->getOperand(0);
22217 SDValue EltNo = InVal->getOperand(1);
22218 auto InVecT = InVec.getValueType();
22219 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22220 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22221 int Elt = C0->getZExtValue();
22222 NewMask[0] = Elt;
22223 // If we have an implict truncate do truncate here as long as it's legal.
22224 // if it's not legal, this should
22225 if (VT.getScalarType() != InVal.getValueType() &&
22226 InVal.getValueType().isScalarInteger() &&
22227 isTypeLegal(VT.getScalarType())) {
22228 SDValue Val =
22229 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22230 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22232 if (VT.getScalarType() == InVecT.getScalarType() &&
22233 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22234 SDValue LegalShuffle =
22235 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22236 DAG.getUNDEF(InVecT), NewMask, DAG);
22237 if (LegalShuffle) {
22238 // If the initial vector is the correct size this shuffle is a
22239 // valid result.
22240 if (VT == InVecT)
22241 return LegalShuffle;
22242 // If not we must truncate the vector.
22243 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22244 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22245 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22246 InVecT.getVectorElementType(),
22247 VT.getVectorNumElements());
22248 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22249 LegalShuffle, ZeroIdx);
22256 return SDValue();
22259 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22260 EVT VT = N->getValueType(0);
22261 SDValue N0 = N->getOperand(0);
22262 SDValue N1 = N->getOperand(1);
22263 SDValue N2 = N->getOperand(2);
22264 uint64_t InsIdx = N->getConstantOperandVal(2);
22266 // If inserting an UNDEF, just return the original vector.
22267 if (N1.isUndef())
22268 return N0;
22270 // If this is an insert of an extracted vector into an undef vector, we can
22271 // just use the input to the extract.
22272 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22273 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22274 return N1.getOperand(0);
22276 // If we are inserting a bitcast value into an undef, with the same
22277 // number of elements, just use the bitcast input of the extract.
22278 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22279 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22280 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22281 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22282 N1.getOperand(0).getOperand(1) == N2 &&
22283 N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
22284 VT.getVectorElementCount() &&
22285 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
22286 VT.getSizeInBits()) {
22287 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22290 // If both N1 and N2 are bitcast values on which insert_subvector
22291 // would makes sense, pull the bitcast through.
22292 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22293 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22294 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22295 SDValue CN0 = N0.getOperand(0);
22296 SDValue CN1 = N1.getOperand(0);
22297 EVT CN0VT = CN0.getValueType();
22298 EVT CN1VT = CN1.getValueType();
22299 if (CN0VT.isVector() && CN1VT.isVector() &&
22300 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22301 CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22302 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
22303 CN0.getValueType(), CN0, CN1, N2);
22304 return DAG.getBitcast(VT, NewINSERT);
22308 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22309 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22310 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22311 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22312 N0.getOperand(1).getValueType() == N1.getValueType() &&
22313 N0.getOperand(2) == N2)
22314 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22315 N1, N2);
22317 // Eliminate an intermediate insert into an undef vector:
22318 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22319 // insert_subvector undef, X, N2
22320 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22321 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22322 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22323 N1.getOperand(1), N2);
22325 // Push subvector bitcasts to the output, adjusting the index as we go.
22326 // insert_subvector(bitcast(v), bitcast(s), c1)
22327 // -> bitcast(insert_subvector(v, s, c2))
22328 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22329 N1.getOpcode() == ISD::BITCAST) {
22330 SDValue N0Src = peekThroughBitcasts(N0);
22331 SDValue N1Src = peekThroughBitcasts(N1);
22332 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22333 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22334 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22335 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22336 EVT NewVT;
22337 SDLoc DL(N);
22338 SDValue NewIdx;
22339 LLVMContext &Ctx = *DAG.getContext();
22340 ElementCount NumElts = VT.getVectorElementCount();
22341 unsigned EltSizeInBits = VT.getScalarSizeInBits();
22342 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22343 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22344 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22345 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22346 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22347 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22348 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22349 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
22350 NumElts.divideCoefficientBy(Scale));
22351 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22354 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22355 SDValue Res = DAG.getBitcast(NewVT, N0Src);
22356 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22357 return DAG.getBitcast(VT, Res);
22362 // Canonicalize insert_subvector dag nodes.
22363 // Example:
22364 // (insert_subvector (insert_subvector A, Idx0), Idx1)
22365 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22366 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22367 N1.getValueType() == N0.getOperand(1).getValueType()) {
22368 unsigned OtherIdx = N0.getConstantOperandVal(2);
22369 if (InsIdx < OtherIdx) {
22370 // Swap nodes.
22371 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
22372 N0.getOperand(0), N1, N2);
22373 AddToWorklist(NewOp.getNode());
22374 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22375 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22379 // If the input vector is a concatenation, and the insert replaces
22380 // one of the pieces, we can optimize into a single concat_vectors.
22381 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22382 N0.getOperand(0).getValueType() == N1.getValueType() &&
22383 N0.getOperand(0).getValueType().isScalableVector() ==
22384 N1.getValueType().isScalableVector()) {
22385 unsigned Factor = N1.getValueType().getVectorMinNumElements();
22386 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22387 Ops[InsIdx / Factor] = N1;
22388 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22391 // Simplify source operands based on insertion.
22392 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22393 return SDValue(N, 0);
22395 return SDValue();
22398 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22399 SDValue N0 = N->getOperand(0);
22401 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22402 if (N0->getOpcode() == ISD::FP16_TO_FP)
22403 return N0->getOperand(0);
22405 return SDValue();
22408 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22409 SDValue N0 = N->getOperand(0);
22411 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22412 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22413 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
22414 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22415 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22416 N0.getOperand(0));
22420 return SDValue();
22423 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22424 SDValue N0 = N->getOperand(0);
22425 EVT VT = N0.getValueType();
22426 unsigned Opcode = N->getOpcode();
22428 // VECREDUCE over 1-element vector is just an extract.
22429 if (VT.getVectorElementCount().isScalar()) {
22430 SDLoc dl(N);
22431 SDValue Res =
22432 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22433 DAG.getVectorIdxConstant(0, dl));
22434 if (Res.getValueType() != N->getValueType(0))
22435 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22436 return Res;
22439 // On an boolean vector an and/or reduction is the same as a umin/umax
22440 // reduction. Convert them if the latter is legal while the former isn't.
22441 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22442 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22443 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22444 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22445 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22446 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22447 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22450 return SDValue();
22453 SDValue DAGCombiner::visitVPOp(SDNode *N) {
22454 // VP operations in which all vector elements are disabled - either by
22455 // determining that the mask is all false or that the EVL is 0 - can be
22456 // eliminated.
22457 bool AreAllEltsDisabled = false;
22458 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22459 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22460 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22461 AreAllEltsDisabled |=
22462 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22464 // This is the only generic VP combine we support for now.
22465 if (!AreAllEltsDisabled)
22466 return SDValue();
22468 // Binary operations can be replaced by UNDEF.
22469 if (ISD::isVPBinaryOp(N->getOpcode()))
22470 return DAG.getUNDEF(N->getValueType(0));
22472 // VP Memory operations can be replaced by either the chain (stores) or the
22473 // chain + undef (loads).
22474 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22475 if (MemSD->writeMem())
22476 return MemSD->getChain();
22477 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22480 // Reduction operations return the start operand when no elements are active.
22481 if (ISD::isVPReduction(N->getOpcode()))
22482 return N->getOperand(0);
22484 return SDValue();
22487 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22488 /// with the destination vector and a zero vector.
22489 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22490 /// vector_shuffle V, Zero, <0, 4, 2, 4>
22491 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22492 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22494 EVT VT = N->getValueType(0);
22495 SDValue LHS = N->getOperand(0);
22496 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22497 SDLoc DL(N);
22499 // Make sure we're not running after operation legalization where it
22500 // may have custom lowered the vector shuffles.
22501 if (LegalOperations)
22502 return SDValue();
22504 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22505 return SDValue();
22507 EVT RVT = RHS.getValueType();
22508 unsigned NumElts = RHS.getNumOperands();
22510 // Attempt to create a valid clear mask, splitting the mask into
22511 // sub elements and checking to see if each is
22512 // all zeros or all ones - suitable for shuffle masking.
22513 auto BuildClearMask = [&](int Split) {
22514 int NumSubElts = NumElts * Split;
22515 int NumSubBits = RVT.getScalarSizeInBits() / Split;
22517 SmallVector<int, 8> Indices;
22518 for (int i = 0; i != NumSubElts; ++i) {
22519 int EltIdx = i / Split;
22520 int SubIdx = i % Split;
22521 SDValue Elt = RHS.getOperand(EltIdx);
22522 // X & undef --> 0 (not undef). So this lane must be converted to choose
22523 // from the zero constant vector (same as if the element had all 0-bits).
22524 if (Elt.isUndef()) {
22525 Indices.push_back(i + NumSubElts);
22526 continue;
22529 APInt Bits;
22530 if (isa<ConstantSDNode>(Elt))
22531 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22532 else if (isa<ConstantFPSDNode>(Elt))
22533 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22534 else
22535 return SDValue();
22537 // Extract the sub element from the constant bit mask.
22538 if (DAG.getDataLayout().isBigEndian())
22539 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22540 else
22541 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22543 if (Bits.isAllOnes())
22544 Indices.push_back(i);
22545 else if (Bits == 0)
22546 Indices.push_back(i + NumSubElts);
22547 else
22548 return SDValue();
22551 // Let's see if the target supports this vector_shuffle.
22552 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22553 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22554 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22555 return SDValue();
22557 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22558 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22559 DAG.getBitcast(ClearVT, LHS),
22560 Zero, Indices));
22563 // Determine maximum split level (byte level masking).
22564 int MaxSplit = 1;
22565 if (RVT.getScalarSizeInBits() % 8 == 0)
22566 MaxSplit = RVT.getScalarSizeInBits() / 8;
22568 for (int Split = 1; Split <= MaxSplit; ++Split)
22569 if (RVT.getScalarSizeInBits() % Split == 0)
22570 if (SDValue S = BuildClearMask(Split))
22571 return S;
22573 return SDValue();
22576 /// If a vector binop is performed on splat values, it may be profitable to
22577 /// extract, scalarize, and insert/splat.
22578 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
22579 const SDLoc &DL) {
22580 SDValue N0 = N->getOperand(0);
22581 SDValue N1 = N->getOperand(1);
22582 unsigned Opcode = N->getOpcode();
22583 EVT VT = N->getValueType(0);
22584 EVT EltVT = VT.getVectorElementType();
22585 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22587 // TODO: Remove/replace the extract cost check? If the elements are available
22588 // as scalars, then there may be no extract cost. Should we ask if
22589 // inserting a scalar back into a vector is cheap instead?
22590 int Index0, Index1;
22591 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22592 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22593 if (!Src0 || !Src1 || Index0 != Index1 ||
22594 Src0.getValueType().getVectorElementType() != EltVT ||
22595 Src1.getValueType().getVectorElementType() != EltVT ||
22596 !TLI.isExtractVecEltCheap(VT, Index0) ||
22597 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22598 return SDValue();
22600 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22601 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22602 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22603 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22605 // If all lanes but 1 are undefined, no need to splat the scalar result.
22606 // TODO: Keep track of undefs and use that info in the general case.
22607 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22608 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22609 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22610 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22611 // build_vec ..undef, (bo X, Y), undef...
22612 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22613 Ops[Index0] = ScalarBO;
22614 return DAG.getBuildVector(VT, DL, Ops);
22617 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22618 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22619 return DAG.getBuildVector(VT, DL, Ops);
22622 /// Visit a binary vector operation, like ADD.
22623 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
22624 EVT VT = N->getValueType(0);
22625 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
22627 SDValue LHS = N->getOperand(0);
22628 SDValue RHS = N->getOperand(1);
22629 unsigned Opcode = N->getOpcode();
22630 SDNodeFlags Flags = N->getFlags();
22632 // Move unary shuffles with identical masks after a vector binop:
22633 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22634 // --> shuffle (VBinOp A, B), Undef, Mask
22635 // This does not require type legality checks because we are creating the
22636 // same types of operations that are in the original sequence. We do have to
22637 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22638 // though. This code is adapted from the identical transform in instcombine.
22639 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22640 Opcode != ISD::UREM && Opcode != ISD::SREM &&
22641 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22642 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22643 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22644 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22645 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22646 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22647 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22648 RHS.getOperand(0), Flags);
22649 SDValue UndefV = LHS.getOperand(1);
22650 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22653 // Try to sink a splat shuffle after a binop with a uniform constant.
22654 // This is limited to cases where neither the shuffle nor the constant have
22655 // undefined elements because that could be poison-unsafe or inhibit
22656 // demanded elements analysis. It is further limited to not change a splat
22657 // of an inserted scalar because that may be optimized better by
22658 // load-folding or other target-specific behaviors.
22659 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22660 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22661 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22662 // binop (splat X), (splat C) --> splat (binop X, C)
22663 SDValue X = Shuf0->getOperand(0);
22664 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22665 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22666 Shuf0->getMask());
22668 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22669 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22670 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22671 // binop (splat C), (splat X) --> splat (binop C, X)
22672 SDValue X = Shuf1->getOperand(0);
22673 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22674 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22675 Shuf1->getMask());
22679 // The following pattern is likely to emerge with vector reduction ops. Moving
22680 // the binary operation ahead of insertion may allow using a narrower vector
22681 // instruction that has better performance than the wide version of the op:
22682 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22683 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22684 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22685 LHS.getOperand(2) == RHS.getOperand(2) &&
22686 (LHS.hasOneUse() || RHS.hasOneUse())) {
22687 SDValue X = LHS.getOperand(1);
22688 SDValue Y = RHS.getOperand(1);
22689 SDValue Z = LHS.getOperand(2);
22690 EVT NarrowVT = X.getValueType();
22691 if (NarrowVT == Y.getValueType() &&
22692 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22693 LegalOperations)) {
22694 // (binop undef, undef) may not return undef, so compute that result.
22695 SDValue VecC =
22696 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22697 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22698 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22702 // Make sure all but the first op are undef or constant.
22703 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22704 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22705 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22706 return Op.isUndef() ||
22707 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22711 // The following pattern is likely to emerge with vector reduction ops. Moving
22712 // the binary operation ahead of the concat may allow using a narrower vector
22713 // instruction that has better performance than the wide version of the op:
22714 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22715 // concat (VBinOp X, Y), VecC
22716 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22717 (LHS.hasOneUse() || RHS.hasOneUse())) {
22718 EVT NarrowVT = LHS.getOperand(0).getValueType();
22719 if (NarrowVT == RHS.getOperand(0).getValueType() &&
22720 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22721 unsigned NumOperands = LHS.getNumOperands();
22722 SmallVector<SDValue, 4> ConcatOps;
22723 for (unsigned i = 0; i != NumOperands; ++i) {
22724 // This constant fold for operands 1 and up.
22725 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22726 RHS.getOperand(i)));
22729 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22733 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
22734 return V;
22736 return SDValue();
22739 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22740 SDValue N2) {
22741 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22743 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22744 cast<CondCodeSDNode>(N0.getOperand(2))->get());
22746 // If we got a simplified select_cc node back from SimplifySelectCC, then
22747 // break it down into a new SETCC node, and a new SELECT node, and then return
22748 // the SELECT node, since we were called with a SELECT node.
22749 if (SCC.getNode()) {
22750 // Check to see if we got a select_cc back (to turn into setcc/select).
22751 // Otherwise, just return whatever node we got back, like fabs.
22752 if (SCC.getOpcode() == ISD::SELECT_CC) {
22753 const SDNodeFlags Flags = N0.getNode()->getFlags();
22754 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22755 N0.getValueType(),
22756 SCC.getOperand(0), SCC.getOperand(1),
22757 SCC.getOperand(4), Flags);
22758 AddToWorklist(SETCC.getNode());
22759 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22760 SCC.getOperand(2), SCC.getOperand(3));
22761 SelectNode->setFlags(Flags);
22762 return SelectNode;
22765 return SCC;
22767 return SDValue();
22770 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22771 /// being selected between, see if we can simplify the select. Callers of this
22772 /// should assume that TheSelect is deleted if this returns true. As such, they
22773 /// should return the appropriate thing (e.g. the node) back to the top-level of
22774 /// the DAG combiner loop to avoid it being looked at.
22775 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22776 SDValue RHS) {
22777 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22778 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22779 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22780 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22781 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22782 SDValue Sqrt = RHS;
22783 ISD::CondCode CC;
22784 SDValue CmpLHS;
22785 const ConstantFPSDNode *Zero = nullptr;
22787 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22788 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22789 CmpLHS = TheSelect->getOperand(0);
22790 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22791 } else {
22792 // SELECT or VSELECT
22793 SDValue Cmp = TheSelect->getOperand(0);
22794 if (Cmp.getOpcode() == ISD::SETCC) {
22795 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22796 CmpLHS = Cmp.getOperand(0);
22797 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22800 if (Zero && Zero->isZero() &&
22801 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22802 CC == ISD::SETULT || CC == ISD::SETLT)) {
22803 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22804 CombineTo(TheSelect, Sqrt);
22805 return true;
22809 // Cannot simplify select with vector condition
22810 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22812 // If this is a select from two identical things, try to pull the operation
22813 // through the select.
22814 if (LHS.getOpcode() != RHS.getOpcode() ||
22815 !LHS.hasOneUse() || !RHS.hasOneUse())
22816 return false;
22818 // If this is a load and the token chain is identical, replace the select
22819 // of two loads with a load through a select of the address to load from.
22820 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22821 // constants have been dropped into the constant pool.
22822 if (LHS.getOpcode() == ISD::LOAD) {
22823 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22824 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22826 // Token chains must be identical.
22827 if (LHS.getOperand(0) != RHS.getOperand(0) ||
22828 // Do not let this transformation reduce the number of volatile loads.
22829 // Be conservative for atomics for the moment
22830 // TODO: This does appear to be legal for unordered atomics (see D66309)
22831 !LLD->isSimple() || !RLD->isSimple() ||
22832 // FIXME: If either is a pre/post inc/dec load,
22833 // we'd need to split out the address adjustment.
22834 LLD->isIndexed() || RLD->isIndexed() ||
22835 // If this is an EXTLOAD, the VT's must match.
22836 LLD->getMemoryVT() != RLD->getMemoryVT() ||
22837 // If this is an EXTLOAD, the kind of extension must match.
22838 (LLD->getExtensionType() != RLD->getExtensionType() &&
22839 // The only exception is if one of the extensions is anyext.
22840 LLD->getExtensionType() != ISD::EXTLOAD &&
22841 RLD->getExtensionType() != ISD::EXTLOAD) ||
22842 // FIXME: this discards src value information. This is
22843 // over-conservative. It would be beneficial to be able to remember
22844 // both potential memory locations. Since we are discarding
22845 // src value info, don't do the transformation if the memory
22846 // locations are not in the default address space.
22847 LLD->getPointerInfo().getAddrSpace() != 0 ||
22848 RLD->getPointerInfo().getAddrSpace() != 0 ||
22849 // We can't produce a CMOV of a TargetFrameIndex since we won't
22850 // generate the address generation required.
22851 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22852 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22853 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22854 LLD->getBasePtr().getValueType()))
22855 return false;
22857 // The loads must not depend on one another.
22858 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22859 return false;
22861 // Check that the select condition doesn't reach either load. If so,
22862 // folding this will induce a cycle into the DAG. If not, this is safe to
22863 // xform, so create a select of the addresses.
22865 SmallPtrSet<const SDNode *, 32> Visited;
22866 SmallVector<const SDNode *, 16> Worklist;
22868 // Always fail if LLD and RLD are not independent. TheSelect is a
22869 // predecessor to all Nodes in question so we need not search past it.
22871 Visited.insert(TheSelect);
22872 Worklist.push_back(LLD);
22873 Worklist.push_back(RLD);
22875 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22876 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22877 return false;
22879 SDValue Addr;
22880 if (TheSelect->getOpcode() == ISD::SELECT) {
22881 // We cannot do this optimization if any pair of {RLD, LLD} is a
22882 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22883 // Loads, we only need to check if CondNode is a successor to one of the
22884 // loads. We can further avoid this if there's no use of their chain
22885 // value.
22886 SDNode *CondNode = TheSelect->getOperand(0).getNode();
22887 Worklist.push_back(CondNode);
22889 if ((LLD->hasAnyUseOfValue(1) &&
22890 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22891 (RLD->hasAnyUseOfValue(1) &&
22892 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22893 return false;
22895 Addr = DAG.getSelect(SDLoc(TheSelect),
22896 LLD->getBasePtr().getValueType(),
22897 TheSelect->getOperand(0), LLD->getBasePtr(),
22898 RLD->getBasePtr());
22899 } else { // Otherwise SELECT_CC
22900 // We cannot do this optimization if any pair of {RLD, LLD} is a
22901 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22902 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22903 // one of the loads. We can further avoid this if there's no use of their
22904 // chain value.
22906 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22907 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22908 Worklist.push_back(CondLHS);
22909 Worklist.push_back(CondRHS);
22911 if ((LLD->hasAnyUseOfValue(1) &&
22912 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22913 (RLD->hasAnyUseOfValue(1) &&
22914 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22915 return false;
22917 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22918 LLD->getBasePtr().getValueType(),
22919 TheSelect->getOperand(0),
22920 TheSelect->getOperand(1),
22921 LLD->getBasePtr(), RLD->getBasePtr(),
22922 TheSelect->getOperand(4));
22925 SDValue Load;
22926 // It is safe to replace the two loads if they have different alignments,
22927 // but the new load must be the minimum (most restrictive) alignment of the
22928 // inputs.
22929 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22930 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22931 if (!RLD->isInvariant())
22932 MMOFlags &= ~MachineMemOperand::MOInvariant;
22933 if (!RLD->isDereferenceable())
22934 MMOFlags &= ~MachineMemOperand::MODereferenceable;
22935 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22936 // FIXME: Discards pointer and AA info.
22937 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22938 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22939 MMOFlags);
22940 } else {
22941 // FIXME: Discards pointer and AA info.
22942 Load = DAG.getExtLoad(
22943 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22944 : LLD->getExtensionType(),
22945 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22946 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22949 // Users of the select now use the result of the load.
22950 CombineTo(TheSelect, Load);
22952 // Users of the old loads now use the new load's chain. We know the
22953 // old-load value is dead now.
22954 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22955 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22956 return true;
22959 return false;
22962 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22963 /// bitwise 'and'.
22964 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22965 SDValue N1, SDValue N2, SDValue N3,
22966 ISD::CondCode CC) {
22967 // If this is a select where the false operand is zero and the compare is a
22968 // check of the sign bit, see if we can perform the "gzip trick":
22969 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22970 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22971 EVT XType = N0.getValueType();
22972 EVT AType = N2.getValueType();
22973 if (!isNullConstant(N3) || !XType.bitsGE(AType))
22974 return SDValue();
22976 // If the comparison is testing for a positive value, we have to invert
22977 // the sign bit mask, so only do that transform if the target has a bitwise
22978 // 'and not' instruction (the invert is free).
22979 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22980 // (X > -1) ? A : 0
22981 // (X > 0) ? X : 0 <-- This is canonical signed max.
22982 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22983 return SDValue();
22984 } else if (CC == ISD::SETLT) {
22985 // (X < 0) ? A : 0
22986 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
22987 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22988 return SDValue();
22989 } else {
22990 return SDValue();
22993 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22994 // constant.
22995 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22996 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22997 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22998 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22999 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23000 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23001 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23002 AddToWorklist(Shift.getNode());
23004 if (XType.bitsGT(AType)) {
23005 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23006 AddToWorklist(Shift.getNode());
23009 if (CC == ISD::SETGT)
23010 Shift = DAG.getNOT(DL, Shift, AType);
23012 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23016 unsigned ShCt = XType.getSizeInBits() - 1;
23017 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23018 return SDValue();
23020 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23021 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23022 AddToWorklist(Shift.getNode());
23024 if (XType.bitsGT(AType)) {
23025 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23026 AddToWorklist(Shift.getNode());
23029 if (CC == ISD::SETGT)
23030 Shift = DAG.getNOT(DL, Shift, AType);
23032 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23035 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23036 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23037 SDValue N0 = N->getOperand(0);
23038 SDValue N1 = N->getOperand(1);
23039 SDValue N2 = N->getOperand(2);
23040 EVT VT = N->getValueType(0);
23041 SDLoc DL(N);
23043 unsigned BinOpc = N1.getOpcode();
23044 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23045 return SDValue();
23047 // The use checks are intentionally on SDNode because we may be dealing
23048 // with opcodes that produce more than one SDValue.
23049 // TODO: Do we really need to check N0 (the condition operand of the select)?
23050 // But removing that clause could cause an infinite loop...
23051 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23052 return SDValue();
23054 // Binops may include opcodes that return multiple values, so all values
23055 // must be created/propagated from the newly created binops below.
23056 SDVTList OpVTs = N1->getVTList();
23058 // Fold select(cond, binop(x, y), binop(z, y))
23059 // --> binop(select(cond, x, z), y)
23060 if (N1.getOperand(1) == N2.getOperand(1)) {
23061 SDValue NewSel =
23062 DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23063 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23064 NewBinOp->setFlags(N1->getFlags());
23065 NewBinOp->intersectFlagsWith(N2->getFlags());
23066 return NewBinOp;
23069 // Fold select(cond, binop(x, y), binop(x, z))
23070 // --> binop(x, select(cond, y, z))
23071 // Second op VT might be different (e.g. shift amount type)
23072 if (N1.getOperand(0) == N2.getOperand(0) &&
23073 VT == N1.getOperand(1).getValueType() &&
23074 VT == N2.getOperand(1).getValueType()) {
23075 SDValue NewSel =
23076 DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23077 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23078 NewBinOp->setFlags(N1->getFlags());
23079 NewBinOp->intersectFlagsWith(N2->getFlags());
23080 return NewBinOp;
23083 // TODO: Handle isCommutativeBinOp patterns as well?
23084 return SDValue();
23087 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23088 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23089 SDValue N0 = N->getOperand(0);
23090 EVT VT = N->getValueType(0);
23091 bool IsFabs = N->getOpcode() == ISD::FABS;
23092 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23094 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23095 return SDValue();
23097 SDValue Int = N0.getOperand(0);
23098 EVT IntVT = Int.getValueType();
23100 // The operand to cast should be integer.
23101 if (!IntVT.isInteger() || IntVT.isVector())
23102 return SDValue();
23104 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23105 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23106 APInt SignMask;
23107 if (N0.getValueType().isVector()) {
23108 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23109 // 0x7f...) per element and splat it.
23110 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
23111 if (IsFabs)
23112 SignMask = ~SignMask;
23113 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23114 } else {
23115 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23116 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23117 if (IsFabs)
23118 SignMask = ~SignMask;
23120 SDLoc DL(N0);
23121 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23122 DAG.getConstant(SignMask, DL, IntVT));
23123 AddToWorklist(Int.getNode());
23124 return DAG.getBitcast(VT, Int);
23127 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23128 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23129 /// in it. This may be a win when the constant is not otherwise available
23130 /// because it replaces two constant pool loads with one.
23131 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23132 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23133 ISD::CondCode CC) {
23134 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23135 return SDValue();
23137 // If we are before legalize types, we want the other legalization to happen
23138 // first (for example, to avoid messing with soft float).
23139 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23140 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23141 EVT VT = N2.getValueType();
23142 if (!TV || !FV || !TLI.isTypeLegal(VT))
23143 return SDValue();
23145 // If a constant can be materialized without loads, this does not make sense.
23146 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23147 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23148 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23149 return SDValue();
23151 // If both constants have multiple uses, then we won't need to do an extra
23152 // load. The values are likely around in registers for other users.
23153 if (!TV->hasOneUse() && !FV->hasOneUse())
23154 return SDValue();
23156 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23157 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23158 Type *FPTy = Elts[0]->getType();
23159 const DataLayout &TD = DAG.getDataLayout();
23161 // Create a ConstantArray of the two constants.
23162 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23163 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23164 TD.getPrefTypeAlign(FPTy));
23165 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23167 // Get offsets to the 0 and 1 elements of the array, so we can select between
23168 // them.
23169 SDValue Zero = DAG.getIntPtrConstant(0, DL);
23170 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23171 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23172 SDValue Cond =
23173 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23174 AddToWorklist(Cond.getNode());
23175 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23176 AddToWorklist(CstOffset.getNode());
23177 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23178 AddToWorklist(CPIdx.getNode());
23179 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23180 MachinePointerInfo::getConstantPool(
23181 DAG.getMachineFunction()), Alignment);
23184 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23185 /// where 'cond' is the comparison specified by CC.
23186 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23187 SDValue N2, SDValue N3, ISD::CondCode CC,
23188 bool NotExtCompare) {
23189 // (x ? y : y) -> y.
23190 if (N2 == N3) return N2;
23192 EVT CmpOpVT = N0.getValueType();
23193 EVT CmpResVT = getSetCCResultType(CmpOpVT);
23194 EVT VT = N2.getValueType();
23195 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23196 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23197 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23199 // Determine if the condition we're dealing with is constant.
23200 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23201 AddToWorklist(SCC.getNode());
23202 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23203 // fold select_cc true, x, y -> x
23204 // fold select_cc false, x, y -> y
23205 return !(SCCC->isZero()) ? N2 : N3;
23209 if (SDValue V =
23210 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23211 return V;
23213 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23214 return V;
23216 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23217 // where y is has a single bit set.
23218 // A plaintext description would be, we can turn the SELECT_CC into an AND
23219 // when the condition can be materialized as an all-ones register. Any
23220 // single bit-test can be materialized as an all-ones register with
23221 // shift-left and shift-right-arith.
23222 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23223 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23224 SDValue AndLHS = N0->getOperand(0);
23225 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23226 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23227 // Shift the tested bit over the sign bit.
23228 const APInt &AndMask = ConstAndRHS->getAPIntValue();
23229 unsigned ShCt = AndMask.getBitWidth() - 1;
23230 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23231 SDValue ShlAmt =
23232 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23233 getShiftAmountTy(AndLHS.getValueType()));
23234 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23236 // Now arithmetic right shift it all the way over, so the result is
23237 // either all-ones, or zero.
23238 SDValue ShrAmt =
23239 DAG.getConstant(ShCt, SDLoc(Shl),
23240 getShiftAmountTy(Shl.getValueType()));
23241 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23243 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23248 // fold select C, 16, 0 -> shl C, 4
23249 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23250 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23252 if ((Fold || Swap) &&
23253 TLI.getBooleanContents(CmpOpVT) ==
23254 TargetLowering::ZeroOrOneBooleanContent &&
23255 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23257 if (Swap) {
23258 CC = ISD::getSetCCInverse(CC, CmpOpVT);
23259 std::swap(N2C, N3C);
23262 // If the caller doesn't want us to simplify this into a zext of a compare,
23263 // don't do it.
23264 if (NotExtCompare && N2C->isOne())
23265 return SDValue();
23267 SDValue Temp, SCC;
23268 // zext (setcc n0, n1)
23269 if (LegalTypes) {
23270 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23271 if (VT.bitsLT(SCC.getValueType()))
23272 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23273 else
23274 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23275 } else {
23276 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23277 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23280 AddToWorklist(SCC.getNode());
23281 AddToWorklist(Temp.getNode());
23283 if (N2C->isOne())
23284 return Temp;
23286 unsigned ShCt = N2C->getAPIntValue().logBase2();
23287 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23288 return SDValue();
23290 // shl setcc result by log2 n2c
23291 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23292 DAG.getConstant(ShCt, SDLoc(Temp),
23293 getShiftAmountTy(Temp.getValueType())));
23296 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23297 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23298 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23299 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23300 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23301 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23302 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23303 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23304 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23305 SDValue ValueOnZero = N2;
23306 SDValue Count = N3;
23307 // If the condition is NE instead of E, swap the operands.
23308 if (CC == ISD::SETNE)
23309 std::swap(ValueOnZero, Count);
23310 // Check if the value on zero is a constant equal to the bits in the type.
23311 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
23312 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23313 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23314 // legal, combine to just cttz.
23315 if ((Count.getOpcode() == ISD::CTTZ ||
23316 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23317 N0 == Count.getOperand(0) &&
23318 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23319 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23320 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23321 // legal, combine to just ctlz.
23322 if ((Count.getOpcode() == ISD::CTLZ ||
23323 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23324 N0 == Count.getOperand(0) &&
23325 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23326 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23331 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23332 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23333 if (!NotExtCompare && N1C && N2C && N3C &&
23334 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23335 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23336 (N1C->isZero() && CC == ISD::SETLT)) &&
23337 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23338 SDValue ASR = DAG.getNode(
23339 ISD::SRA, DL, CmpOpVT, N0,
23340 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23341 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23342 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23345 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23346 return S;
23348 return SDValue();
23351 /// This is a stub for TargetLowering::SimplifySetCC.
23352 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23353 ISD::CondCode Cond, const SDLoc &DL,
23354 bool foldBooleans) {
23355 TargetLowering::DAGCombinerInfo
23356 DagCombineInfo(DAG, Level, false, this);
23357 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23360 /// Given an ISD::SDIV node expressing a divide by constant, return
23361 /// a DAG expression to select that will generate the same value by multiplying
23362 /// by a magic number.
23363 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23364 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23365 // when optimising for minimum size, we don't want to expand a div to a mul
23366 // and a shift.
23367 if (DAG.getMachineFunction().getFunction().hasMinSize())
23368 return SDValue();
23370 SmallVector<SDNode *, 8> Built;
23371 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23372 for (SDNode *N : Built)
23373 AddToWorklist(N);
23374 return S;
23377 return SDValue();
23380 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23381 /// DAG expression that will generate the same value by right shifting.
23382 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23383 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23384 if (!C)
23385 return SDValue();
23387 // Avoid division by zero.
23388 if (C->isZero())
23389 return SDValue();
23391 SmallVector<SDNode *, 8> Built;
23392 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23393 for (SDNode *N : Built)
23394 AddToWorklist(N);
23395 return S;
23398 return SDValue();
23401 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23402 /// expression that will generate the same value by multiplying by a magic
23403 /// number.
23404 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23405 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23406 // when optimising for minimum size, we don't want to expand a div to a mul
23407 // and a shift.
23408 if (DAG.getMachineFunction().getFunction().hasMinSize())
23409 return SDValue();
23411 SmallVector<SDNode *, 8> Built;
23412 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23413 for (SDNode *N : Built)
23414 AddToWorklist(N);
23415 return S;
23418 return SDValue();
23421 /// Determines the LogBase2 value for a non-null input value using the
23422 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23423 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23424 EVT VT = V.getValueType();
23425 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23426 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23427 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23428 return LogBase2;
23431 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23432 /// For the reciprocal, we need to find the zero of the function:
23433 /// F(X) = 1/X - A [which has a zero at X = 1/A]
23434 /// =>
23435 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23436 /// does not require additional intermediate precision]
23437 /// For the last iteration, put numerator N into it to gain more precision:
23438 /// Result = N X_i + X_i (N - N A X_i)
23439 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23440 SDNodeFlags Flags) {
23441 if (LegalDAG)
23442 return SDValue();
23444 // TODO: Handle extended types?
23445 EVT VT = Op.getValueType();
23446 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23447 VT.getScalarType() != MVT::f64)
23448 return SDValue();
23450 // If estimates are explicitly disabled for this function, we're done.
23451 MachineFunction &MF = DAG.getMachineFunction();
23452 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23453 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23454 return SDValue();
23456 // Estimates may be explicitly enabled for this type with a custom number of
23457 // refinement steps.
23458 int Iterations = TLI.getDivRefinementSteps(VT, MF);
23459 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23460 AddToWorklist(Est.getNode());
23462 SDLoc DL(Op);
23463 if (Iterations) {
23464 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23466 // Newton iterations: Est = Est + Est (N - Arg * Est)
23467 // If this is the last iteration, also multiply by the numerator.
23468 for (int i = 0; i < Iterations; ++i) {
23469 SDValue MulEst = Est;
23471 if (i == Iterations - 1) {
23472 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23473 AddToWorklist(MulEst.getNode());
23476 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23477 AddToWorklist(NewEst.getNode());
23479 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23480 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23481 AddToWorklist(NewEst.getNode());
23483 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23484 AddToWorklist(NewEst.getNode());
23486 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23487 AddToWorklist(Est.getNode());
23489 } else {
23490 // If no iterations are available, multiply with N.
23491 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23492 AddToWorklist(Est.getNode());
23495 return Est;
23498 return SDValue();
23501 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23502 /// For the reciprocal sqrt, we need to find the zero of the function:
23503 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23504 /// =>
23505 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23506 /// As a result, we precompute A/2 prior to the iteration loop.
23507 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23508 unsigned Iterations,
23509 SDNodeFlags Flags, bool Reciprocal) {
23510 EVT VT = Arg.getValueType();
23511 SDLoc DL(Arg);
23512 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23514 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23515 // this entire sequence requires only one FP constant.
23516 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23517 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23519 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23520 for (unsigned i = 0; i < Iterations; ++i) {
23521 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23522 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23523 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23524 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23527 // If non-reciprocal square root is requested, multiply the result by Arg.
23528 if (!Reciprocal)
23529 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23531 return Est;
23534 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23535 /// For the reciprocal sqrt, we need to find the zero of the function:
23536 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23537 /// =>
23538 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23539 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23540 unsigned Iterations,
23541 SDNodeFlags Flags, bool Reciprocal) {
23542 EVT VT = Arg.getValueType();
23543 SDLoc DL(Arg);
23544 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23545 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23547 // This routine must enter the loop below to work correctly
23548 // when (Reciprocal == false).
23549 assert(Iterations > 0);
23551 // Newton iterations for reciprocal square root:
23552 // E = (E * -0.5) * ((A * E) * E + -3.0)
23553 for (unsigned i = 0; i < Iterations; ++i) {
23554 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23555 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23556 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23558 // When calculating a square root at the last iteration build:
23559 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23560 // (notice a common subexpression)
23561 SDValue LHS;
23562 if (Reciprocal || (i + 1) < Iterations) {
23563 // RSQRT: LHS = (E * -0.5)
23564 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23565 } else {
23566 // SQRT: LHS = (A * E) * -0.5
23567 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23570 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23573 return Est;
23576 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23577 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23578 /// Op can be zero.
23579 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23580 bool Reciprocal) {
23581 if (LegalDAG)
23582 return SDValue();
23584 // TODO: Handle extended types?
23585 EVT VT = Op.getValueType();
23586 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23587 VT.getScalarType() != MVT::f64)
23588 return SDValue();
23590 // If estimates are explicitly disabled for this function, we're done.
23591 MachineFunction &MF = DAG.getMachineFunction();
23592 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23593 if (Enabled == TLI.ReciprocalEstimate::Disabled)
23594 return SDValue();
23596 // Estimates may be explicitly enabled for this type with a custom number of
23597 // refinement steps.
23598 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23600 bool UseOneConstNR = false;
23601 if (SDValue Est =
23602 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23603 Reciprocal)) {
23604 AddToWorklist(Est.getNode());
23606 if (Iterations)
23607 Est = UseOneConstNR
23608 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23609 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23610 if (!Reciprocal) {
23611 SDLoc DL(Op);
23612 // Try the target specific test first.
23613 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23615 // The estimate is now completely wrong if the input was exactly 0.0 or
23616 // possibly a denormal. Force the answer to 0.0 or value provided by
23617 // target for those cases.
23618 Est = DAG.getNode(
23619 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23620 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23622 return Est;
23625 return SDValue();
23628 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23629 return buildSqrtEstimateImpl(Op, Flags, true);
23632 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23633 return buildSqrtEstimateImpl(Op, Flags, false);
23636 /// Return true if there is any possibility that the two addresses overlap.
23637 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
23639 struct MemUseCharacteristics {
23640 bool IsVolatile;
23641 bool IsAtomic;
23642 SDValue BasePtr;
23643 int64_t Offset;
23644 Optional<int64_t> NumBytes;
23645 MachineMemOperand *MMO;
23648 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23649 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23650 int64_t Offset = 0;
23651 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23652 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23653 ? C->getSExtValue()
23654 : (LSN->getAddressingMode() == ISD::PRE_DEC)
23655 ? -1 * C->getSExtValue()
23656 : 0;
23657 uint64_t Size =
23658 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23659 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23660 Offset /*base offset*/,
23661 Optional<int64_t>(Size),
23662 LSN->getMemOperand()};
23664 if (const auto *LN = cast<LifetimeSDNode>(N))
23665 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23666 (LN->hasOffset()) ? LN->getOffset() : 0,
23667 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23668 : Optional<int64_t>(),
23669 (MachineMemOperand *)nullptr};
23670 // Default.
23671 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23672 (int64_t)0 /*offset*/,
23673 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23676 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23677 MUC1 = getCharacteristics(Op1);
23679 // If they are to the same address, then they must be aliases.
23680 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23681 MUC0.Offset == MUC1.Offset)
23682 return true;
23684 // If they are both volatile then they cannot be reordered.
23685 if (MUC0.IsVolatile && MUC1.IsVolatile)
23686 return true;
23688 // Be conservative about atomics for the moment
23689 // TODO: This is way overconservative for unordered atomics (see D66309)
23690 if (MUC0.IsAtomic && MUC1.IsAtomic)
23691 return true;
23693 if (MUC0.MMO && MUC1.MMO) {
23694 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23695 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23696 return false;
23699 // Try to prove that there is aliasing, or that there is no aliasing. Either
23700 // way, we can return now. If nothing can be proved, proceed with more tests.
23701 bool IsAlias;
23702 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23703 DAG, IsAlias))
23704 return IsAlias;
23706 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23707 // either are not known.
23708 if (!MUC0.MMO || !MUC1.MMO)
23709 return true;
23711 // If one operation reads from invariant memory, and the other may store, they
23712 // cannot alias. These should really be checking the equivalent of mayWrite,
23713 // but it only matters for memory nodes other than load /store.
23714 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23715 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23716 return false;
23718 // If we know required SrcValue1 and SrcValue2 have relatively large
23719 // alignment compared to the size and offset of the access, we may be able
23720 // to prove they do not alias. This check is conservative for now to catch
23721 // cases created by splitting vector types, it only works when the offsets are
23722 // multiples of the size of the data.
23723 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23724 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23725 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23726 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23727 auto &Size0 = MUC0.NumBytes;
23728 auto &Size1 = MUC1.NumBytes;
23729 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23730 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23731 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23732 SrcValOffset1 % *Size1 == 0) {
23733 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23734 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23736 // There is no overlap between these relatively aligned accesses of
23737 // similar size. Return no alias.
23738 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23739 return false;
23742 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23743 ? CombinerGlobalAA
23744 : DAG.getSubtarget().useAA();
23745 #ifndef NDEBUG
23746 if (CombinerAAOnlyFunc.getNumOccurrences() &&
23747 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23748 UseAA = false;
23749 #endif
23751 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23752 Size0.hasValue() && Size1.hasValue()) {
23753 // Use alias analysis information.
23754 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23755 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23756 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23757 if (AA->isNoAlias(
23758 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23759 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23760 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23761 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23762 return false;
23765 // Otherwise we have to assume they alias.
23766 return true;
23769 /// Walk up chain skipping non-aliasing memory nodes,
23770 /// looking for aliasing nodes and adding them to the Aliases vector.
23771 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23772 SmallVectorImpl<SDValue> &Aliases) {
23773 SmallVector<SDValue, 8> Chains; // List of chains to visit.
23774 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
23776 // Get alias information for node.
23777 // TODO: relax aliasing for unordered atomics (see D66309)
23778 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23780 // Starting off.
23781 Chains.push_back(OriginalChain);
23782 unsigned Depth = 0;
23784 // Attempt to improve chain by a single step
23785 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23786 switch (C.getOpcode()) {
23787 case ISD::EntryToken:
23788 // No need to mark EntryToken.
23789 C = SDValue();
23790 return true;
23791 case ISD::LOAD:
23792 case ISD::STORE: {
23793 // Get alias information for C.
23794 // TODO: Relax aliasing for unordered atomics (see D66309)
23795 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23796 cast<LSBaseSDNode>(C.getNode())->isSimple();
23797 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
23798 // Look further up the chain.
23799 C = C.getOperand(0);
23800 return true;
23802 // Alias, so stop here.
23803 return false;
23806 case ISD::CopyFromReg:
23807 // Always forward past past CopyFromReg.
23808 C = C.getOperand(0);
23809 return true;
23811 case ISD::LIFETIME_START:
23812 case ISD::LIFETIME_END: {
23813 // We can forward past any lifetime start/end that can be proven not to
23814 // alias the memory access.
23815 if (!mayAlias(N, C.getNode())) {
23816 // Look further up the chain.
23817 C = C.getOperand(0);
23818 return true;
23820 return false;
23822 default:
23823 return false;
23827 // Look at each chain and determine if it is an alias. If so, add it to the
23828 // aliases list. If not, then continue up the chain looking for the next
23829 // candidate.
23830 while (!Chains.empty()) {
23831 SDValue Chain = Chains.pop_back_val();
23833 // Don't bother if we've seen Chain before.
23834 if (!Visited.insert(Chain.getNode()).second)
23835 continue;
23837 // For TokenFactor nodes, look at each operand and only continue up the
23838 // chain until we reach the depth limit.
23840 // FIXME: The depth check could be made to return the last non-aliasing
23841 // chain we found before we hit a tokenfactor rather than the original
23842 // chain.
23843 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23844 Aliases.clear();
23845 Aliases.push_back(OriginalChain);
23846 return;
23849 if (Chain.getOpcode() == ISD::TokenFactor) {
23850 // We have to check each of the operands of the token factor for "small"
23851 // token factors, so we queue them up. Adding the operands to the queue
23852 // (stack) in reverse order maintains the original order and increases the
23853 // likelihood that getNode will find a matching token factor (CSE.)
23854 if (Chain.getNumOperands() > 16) {
23855 Aliases.push_back(Chain);
23856 continue;
23858 for (unsigned n = Chain.getNumOperands(); n;)
23859 Chains.push_back(Chain.getOperand(--n));
23860 ++Depth;
23861 continue;
23863 // Everything else
23864 if (ImproveChain(Chain)) {
23865 // Updated Chain Found, Consider new chain if one exists.
23866 if (Chain.getNode())
23867 Chains.push_back(Chain);
23868 ++Depth;
23869 continue;
23871 // No Improved Chain Possible, treat as Alias.
23872 Aliases.push_back(Chain);
23876 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23877 /// (aliasing node.)
23878 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23879 if (OptLevel == CodeGenOpt::None)
23880 return OldChain;
23882 // Ops for replacing token factor.
23883 SmallVector<SDValue, 8> Aliases;
23885 // Accumulate all the aliases to this node.
23886 GatherAllAliases(N, OldChain, Aliases);
23888 // If no operands then chain to entry token.
23889 if (Aliases.size() == 0)
23890 return DAG.getEntryNode();
23892 // If a single operand then chain to it. We don't need to revisit it.
23893 if (Aliases.size() == 1)
23894 return Aliases[0];
23896 // Construct a custom tailored token factor.
23897 return DAG.getTokenFactor(SDLoc(N), Aliases);
23900 namespace {
23901 // TODO: Replace with with std::monostate when we move to C++17.
23902 struct UnitT { } Unit;
23903 bool operator==(const UnitT &, const UnitT &) { return true; }
23904 bool operator!=(const UnitT &, const UnitT &) { return false; }
23905 } // namespace
23907 // This function tries to collect a bunch of potentially interesting
23908 // nodes to improve the chains of, all at once. This might seem
23909 // redundant, as this function gets called when visiting every store
23910 // node, so why not let the work be done on each store as it's visited?
23912 // I believe this is mainly important because mergeConsecutiveStores
23913 // is unable to deal with merging stores of different sizes, so unless
23914 // we improve the chains of all the potential candidates up-front
23915 // before running mergeConsecutiveStores, it might only see some of
23916 // the nodes that will eventually be candidates, and then not be able
23917 // to go from a partially-merged state to the desired final
23918 // fully-merged state.
23920 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23921 SmallVector<StoreSDNode *, 8> ChainedStores;
23922 StoreSDNode *STChain = St;
23923 // Intervals records which offsets from BaseIndex have been covered. In
23924 // the common case, every store writes to the immediately previous address
23925 // space and thus merged with the previous interval at insertion time.
23927 using IMap =
23928 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23929 IMap::Allocator A;
23930 IMap Intervals(A);
23932 // This holds the base pointer, index, and the offset in bytes from the base
23933 // pointer.
23934 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23936 // We must have a base and an offset.
23937 if (!BasePtr.getBase().getNode())
23938 return false;
23940 // Do not handle stores to undef base pointers.
23941 if (BasePtr.getBase().isUndef())
23942 return false;
23944 // Do not handle stores to opaque types
23945 if (St->getMemoryVT().isZeroSized())
23946 return false;
23948 // BaseIndexOffset assumes that offsets are fixed-size, which
23949 // is not valid for scalable vectors where the offsets are
23950 // scaled by `vscale`, so bail out early.
23951 if (St->getMemoryVT().isScalableVector())
23952 return false;
23954 // Add ST's interval.
23955 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23957 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23958 if (Chain->getMemoryVT().isScalableVector())
23959 return false;
23961 // If the chain has more than one use, then we can't reorder the mem ops.
23962 if (!SDValue(Chain, 0)->hasOneUse())
23963 break;
23964 // TODO: Relax for unordered atomics (see D66309)
23965 if (!Chain->isSimple() || Chain->isIndexed())
23966 break;
23968 // Find the base pointer and offset for this memory node.
23969 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23970 // Check that the base pointer is the same as the original one.
23971 int64_t Offset;
23972 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23973 break;
23974 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23975 // Make sure we don't overlap with other intervals by checking the ones to
23976 // the left or right before inserting.
23977 auto I = Intervals.find(Offset);
23978 // If there's a next interval, we should end before it.
23979 if (I != Intervals.end() && I.start() < (Offset + Length))
23980 break;
23981 // If there's a previous interval, we should start after it.
23982 if (I != Intervals.begin() && (--I).stop() <= Offset)
23983 break;
23984 Intervals.insert(Offset, Offset + Length, Unit);
23986 ChainedStores.push_back(Chain);
23987 STChain = Chain;
23990 // If we didn't find a chained store, exit.
23991 if (ChainedStores.size() == 0)
23992 return false;
23994 // Improve all chained stores (St and ChainedStores members) starting from
23995 // where the store chain ended and return single TokenFactor.
23996 SDValue NewChain = STChain->getChain();
23997 SmallVector<SDValue, 8> TFOps;
23998 for (unsigned I = ChainedStores.size(); I;) {
23999 StoreSDNode *S = ChainedStores[--I];
24000 SDValue BetterChain = FindBetterChain(S, NewChain);
24001 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24002 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24003 TFOps.push_back(SDValue(S, 0));
24004 ChainedStores[I] = S;
24007 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24008 SDValue BetterChain = FindBetterChain(St, NewChain);
24009 SDValue NewST;
24010 if (St->isTruncatingStore())
24011 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24012 St->getBasePtr(), St->getMemoryVT(),
24013 St->getMemOperand());
24014 else
24015 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24016 St->getBasePtr(), St->getMemOperand());
24018 TFOps.push_back(NewST);
24020 // If we improved every element of TFOps, then we've lost the dependence on
24021 // NewChain to successors of St and we need to add it back to TFOps. Do so at
24022 // the beginning to keep relative order consistent with FindBetterChains.
24023 auto hasImprovedChain = [&](SDValue ST) -> bool {
24024 return ST->getOperand(0) != NewChain;
24026 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24027 if (AddNewChain)
24028 TFOps.insert(TFOps.begin(), NewChain);
24030 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24031 CombineTo(St, TF);
24033 // Add TF and its operands to the worklist.
24034 AddToWorklist(TF.getNode());
24035 for (const SDValue &Op : TF->ops())
24036 AddToWorklist(Op.getNode());
24037 AddToWorklist(STChain);
24038 return true;
24041 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24042 if (OptLevel == CodeGenOpt::None)
24043 return false;
24045 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24047 // We must have a base and an offset.
24048 if (!BasePtr.getBase().getNode())
24049 return false;
24051 // Do not handle stores to undef base pointers.
24052 if (BasePtr.getBase().isUndef())
24053 return false;
24055 // Directly improve a chain of disjoint stores starting at St.
24056 if (parallelizeChainedStores(St))
24057 return true;
24059 // Improve St's Chain..
24060 SDValue BetterChain = FindBetterChain(St, St->getChain());
24061 if (St->getChain() != BetterChain) {
24062 replaceStoreChain(St, BetterChain);
24063 return true;
24065 return false;
24068 /// This is the entry point for the file.
24069 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
24070 CodeGenOpt::Level OptLevel) {
24071 /// This is the main entry point to this class.
24072 DAGCombiner(*this, AA, OptLevel).Run(Level);