1 //===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass reassociates n-ary add expressions and eliminates the redundancy
10 // exposed by the reassociation.
12 // A motivating example:
14 // void foo(int a, int b) {
19 // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
26 // However, the Reassociate pass is unable to do that because it processes each
27 // instruction individually and believes (a + 2) + b is the best form according
28 // to its rank system.
30 // To address this limitation, NaryReassociate reassociates an expression in a
31 // form that reuses existing instructions. As a result, NaryReassociate can
32 // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
33 // (a + b) is computed before.
35 // NaryReassociate works as follows. For every instruction in the form of (a +
36 // b) + c, it checks whether a + c or b + c is already computed by a dominating
37 // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
38 // c) + a and removes the redundancy accordingly. To efficiently look up whether
39 // an expression is computed before, we store each instruction seen and its SCEV
40 // into an SCEV-to-instruction map.
42 // Although the algorithm pattern-matches only ternary additions, it
43 // automatically handles many >3-ary expressions by walking through the function
44 // in the depth-first order. For example, given
49 // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
50 // ((a + c) + b) + d into ((a + c) + d) + b.
52 // Finally, the above dominator-based algorithm may need to be run multiple
53 // iterations before emitting optimal code. One source of this need is that we
54 // only split an operand when it is used only once. The above algorithm can
55 // eliminate an instruction and decrease the usage count of its operands. As a
56 // result, an instruction that previously had multiple uses may become a
57 // single-use instruction and thus eligible for split consideration. For
66 // In the first iteration, we cannot reassociate abc to ac+b because ab is used
67 // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
68 // result, ab2 becomes dead and ab will be used only once in the second
71 // Limitations and TODO items:
73 // 1) We only considers n-ary adds and muls for now. This should be extended
76 //===----------------------------------------------------------------------===//
78 #include "llvm/Transforms/Scalar/NaryReassociate.h"
79 #include "llvm/ADT/DepthFirstIterator.h"
80 #include "llvm/ADT/SmallVector.h"
81 #include "llvm/Analysis/AssumptionCache.h"
82 #include "llvm/Analysis/ScalarEvolution.h"
83 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
84 #include "llvm/Analysis/TargetLibraryInfo.h"
85 #include "llvm/Analysis/TargetTransformInfo.h"
86 #include "llvm/Analysis/ValueTracking.h"
87 #include "llvm/IR/BasicBlock.h"
88 #include "llvm/IR/Constants.h"
89 #include "llvm/IR/DataLayout.h"
90 #include "llvm/IR/DerivedTypes.h"
91 #include "llvm/IR/Dominators.h"
92 #include "llvm/IR/Function.h"
93 #include "llvm/IR/GetElementPtrTypeIterator.h"
94 #include "llvm/IR/IRBuilder.h"
95 #include "llvm/IR/InstrTypes.h"
96 #include "llvm/IR/Instruction.h"
97 #include "llvm/IR/Instructions.h"
98 #include "llvm/IR/Module.h"
99 #include "llvm/IR/Operator.h"
100 #include "llvm/IR/PatternMatch.h"
101 #include "llvm/IR/Type.h"
102 #include "llvm/IR/Value.h"
103 #include "llvm/IR/ValueHandle.h"
104 #include "llvm/InitializePasses.h"
105 #include "llvm/Pass.h"
106 #include "llvm/Support/Casting.h"
107 #include "llvm/Support/ErrorHandling.h"
108 #include "llvm/Transforms/Scalar.h"
109 #include "llvm/Transforms/Utils/Local.h"
110 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
114 using namespace llvm
;
115 using namespace PatternMatch
;
117 #define DEBUG_TYPE "nary-reassociate"
121 class NaryReassociateLegacyPass
: public FunctionPass
{
125 NaryReassociateLegacyPass() : FunctionPass(ID
) {
126 initializeNaryReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
129 bool doInitialization(Module
&M
) override
{
133 bool runOnFunction(Function
&F
) override
;
135 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
136 AU
.addPreserved
<DominatorTreeWrapperPass
>();
137 AU
.addPreserved
<ScalarEvolutionWrapperPass
>();
138 AU
.addPreserved
<TargetLibraryInfoWrapperPass
>();
139 AU
.addRequired
<AssumptionCacheTracker
>();
140 AU
.addRequired
<DominatorTreeWrapperPass
>();
141 AU
.addRequired
<ScalarEvolutionWrapperPass
>();
142 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
143 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
144 AU
.setPreservesCFG();
148 NaryReassociatePass Impl
;
151 } // end anonymous namespace
153 char NaryReassociateLegacyPass::ID
= 0;
155 INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass
, "nary-reassociate",
156 "Nary reassociation", false, false)
157 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
158 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
159 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
)
160 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
161 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
162 INITIALIZE_PASS_END(NaryReassociateLegacyPass
, "nary-reassociate",
163 "Nary reassociation", false, false)
165 FunctionPass
*llvm::createNaryReassociatePass() {
166 return new NaryReassociateLegacyPass();
169 bool NaryReassociateLegacyPass::runOnFunction(Function
&F
) {
173 auto *AC
= &getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
174 auto *DT
= &getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
175 auto *SE
= &getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
176 auto *TLI
= &getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
177 auto *TTI
= &getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
179 return Impl
.runImpl(F
, AC
, DT
, SE
, TLI
, TTI
);
182 PreservedAnalyses
NaryReassociatePass::run(Function
&F
,
183 FunctionAnalysisManager
&AM
) {
184 auto *AC
= &AM
.getResult
<AssumptionAnalysis
>(F
);
185 auto *DT
= &AM
.getResult
<DominatorTreeAnalysis
>(F
);
186 auto *SE
= &AM
.getResult
<ScalarEvolutionAnalysis
>(F
);
187 auto *TLI
= &AM
.getResult
<TargetLibraryAnalysis
>(F
);
188 auto *TTI
= &AM
.getResult
<TargetIRAnalysis
>(F
);
190 if (!runImpl(F
, AC
, DT
, SE
, TLI
, TTI
))
191 return PreservedAnalyses::all();
193 PreservedAnalyses PA
;
194 PA
.preserveSet
<CFGAnalyses
>();
195 PA
.preserve
<ScalarEvolutionAnalysis
>();
199 bool NaryReassociatePass::runImpl(Function
&F
, AssumptionCache
*AC_
,
200 DominatorTree
*DT_
, ScalarEvolution
*SE_
,
201 TargetLibraryInfo
*TLI_
,
202 TargetTransformInfo
*TTI_
) {
208 DL
= &F
.getDataLayout();
210 bool Changed
= false, ChangedInThisIteration
;
212 ChangedInThisIteration
= doOneIteration(F
);
213 Changed
|= ChangedInThisIteration
;
214 } while (ChangedInThisIteration
);
218 bool NaryReassociatePass::doOneIteration(Function
&F
) {
219 bool Changed
= false;
221 // Process the basic blocks in a depth first traversal of the dominator
222 // tree. This order ensures that all bases of a candidate are in Candidates
223 // when we process it.
224 SmallVector
<WeakTrackingVH
, 16> DeadInsts
;
225 for (const auto Node
: depth_first(DT
)) {
226 BasicBlock
*BB
= Node
->getBlock();
227 for (Instruction
&OrigI
: *BB
) {
228 const SCEV
*OrigSCEV
= nullptr;
229 if (Instruction
*NewI
= tryReassociate(&OrigI
, OrigSCEV
)) {
231 OrigI
.replaceAllUsesWith(NewI
);
233 // Add 'OrigI' to the list of dead instructions.
234 DeadInsts
.push_back(WeakTrackingVH(&OrigI
));
235 // Add the rewritten instruction to SeenExprs; the original
236 // instruction is deleted.
237 const SCEV
*NewSCEV
= SE
->getSCEV(NewI
);
238 SeenExprs
[NewSCEV
].push_back(WeakTrackingVH(NewI
));
240 // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
241 // is equivalent to I. However, ScalarEvolution::getSCEV may
242 // weaken nsw causing NewSCEV not to equal OldSCEV. For example,
243 // suppose we reassociate
244 // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4
246 // NewI = &a[sext(i)] + sext(j).
248 // ScalarEvolution computes
249 // getSCEV(I) = a + 4 * sext(i + j)
250 // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)
251 // which are different SCEVs.
253 // To alleviate this issue of ScalarEvolution not always capturing
254 // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can
255 // map both SCEV before and after tryReassociate(I) to I.
257 // This improvement is exercised in @reassociate_gep_nsw in
259 if (NewSCEV
!= OrigSCEV
)
260 SeenExprs
[OrigSCEV
].push_back(WeakTrackingVH(NewI
));
262 SeenExprs
[OrigSCEV
].push_back(WeakTrackingVH(&OrigI
));
265 // Delete all dead instructions from 'DeadInsts'.
266 // Please note ScalarEvolution is updated along the way.
267 RecursivelyDeleteTriviallyDeadInstructionsPermissive(
268 DeadInsts
, TLI
, nullptr, [this](Value
*V
) { SE
->forgetValue(V
); });
273 template <typename PredT
>
275 NaryReassociatePass::matchAndReassociateMinOrMax(Instruction
*I
,
276 const SCEV
*&OrigSCEV
) {
277 Value
*LHS
= nullptr;
278 Value
*RHS
= nullptr;
281 MaxMin_match
<ICmpInst
, bind_ty
<Value
>, bind_ty
<Value
>, PredT
>(
282 m_Value(LHS
), m_Value(RHS
));
283 if (match(I
, MinMaxMatcher
)) {
284 OrigSCEV
= SE
->getSCEV(I
);
285 if (auto *NewMinMax
= dyn_cast_or_null
<Instruction
>(
286 tryReassociateMinOrMax(I
, MinMaxMatcher
, LHS
, RHS
)))
288 if (auto *NewMinMax
= dyn_cast_or_null
<Instruction
>(
289 tryReassociateMinOrMax(I
, MinMaxMatcher
, RHS
, LHS
)))
295 Instruction
*NaryReassociatePass::tryReassociate(Instruction
* I
,
296 const SCEV
*&OrigSCEV
) {
298 if (!SE
->isSCEVable(I
->getType()))
301 switch (I
->getOpcode()) {
302 case Instruction::Add
:
303 case Instruction::Mul
:
304 OrigSCEV
= SE
->getSCEV(I
);
305 return tryReassociateBinaryOp(cast
<BinaryOperator
>(I
));
306 case Instruction::GetElementPtr
:
307 OrigSCEV
= SE
->getSCEV(I
);
308 return tryReassociateGEP(cast
<GetElementPtrInst
>(I
));
313 // Try to match signed/unsigned Min/Max.
314 Instruction
*ResI
= nullptr;
315 // TODO: Currently min/max reassociation is restricted to integer types only
316 // due to use of SCEVExpander which my introduce incompatible forms of min/max
317 // for pointer types.
318 if (I
->getType()->isIntegerTy())
319 if ((ResI
= matchAndReassociateMinOrMax
<umin_pred_ty
>(I
, OrigSCEV
)) ||
320 (ResI
= matchAndReassociateMinOrMax
<smin_pred_ty
>(I
, OrigSCEV
)) ||
321 (ResI
= matchAndReassociateMinOrMax
<umax_pred_ty
>(I
, OrigSCEV
)) ||
322 (ResI
= matchAndReassociateMinOrMax
<smax_pred_ty
>(I
, OrigSCEV
)))
328 static bool isGEPFoldable(GetElementPtrInst
*GEP
,
329 const TargetTransformInfo
*TTI
) {
330 SmallVector
<const Value
*, 4> Indices(GEP
->indices());
331 return TTI
->getGEPCost(GEP
->getSourceElementType(), GEP
->getPointerOperand(),
332 Indices
) == TargetTransformInfo::TCC_Free
;
335 Instruction
*NaryReassociatePass::tryReassociateGEP(GetElementPtrInst
*GEP
) {
336 // Not worth reassociating GEP if it is foldable.
337 if (isGEPFoldable(GEP
, TTI
))
340 gep_type_iterator GTI
= gep_type_begin(*GEP
);
341 for (unsigned I
= 1, E
= GEP
->getNumOperands(); I
!= E
; ++I
, ++GTI
) {
342 if (GTI
.isSequential()) {
343 if (auto *NewGEP
= tryReassociateGEPAtIndex(GEP
, I
- 1,
344 GTI
.getIndexedType())) {
352 bool NaryReassociatePass::requiresSignExtension(Value
*Index
,
353 GetElementPtrInst
*GEP
) {
354 unsigned IndexSizeInBits
=
355 DL
->getIndexSizeInBits(GEP
->getType()->getPointerAddressSpace());
356 return cast
<IntegerType
>(Index
->getType())->getBitWidth() < IndexSizeInBits
;
360 NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst
*GEP
,
361 unsigned I
, Type
*IndexedType
) {
362 SimplifyQuery
SQ(*DL
, DT
, AC
, GEP
);
363 Value
*IndexToSplit
= GEP
->getOperand(I
+ 1);
364 if (SExtInst
*SExt
= dyn_cast
<SExtInst
>(IndexToSplit
)) {
365 IndexToSplit
= SExt
->getOperand(0);
366 } else if (ZExtInst
*ZExt
= dyn_cast
<ZExtInst
>(IndexToSplit
)) {
367 // zext can be treated as sext if the source is non-negative.
368 if (isKnownNonNegative(ZExt
->getOperand(0), SQ
))
369 IndexToSplit
= ZExt
->getOperand(0);
372 if (AddOperator
*AO
= dyn_cast
<AddOperator
>(IndexToSplit
)) {
373 // If the I-th index needs sext and the underlying add is not equipped with
374 // nsw, we cannot split the add because
375 // sext(LHS + RHS) != sext(LHS) + sext(RHS).
376 if (requiresSignExtension(IndexToSplit
, GEP
) &&
377 computeOverflowForSignedAdd(AO
, SQ
) != OverflowResult::NeverOverflows
)
380 Value
*LHS
= AO
->getOperand(0), *RHS
= AO
->getOperand(1);
381 // IndexToSplit = LHS + RHS.
382 if (auto *NewGEP
= tryReassociateGEPAtIndex(GEP
, I
, LHS
, RHS
, IndexedType
))
384 // Symmetrically, try IndexToSplit = RHS + LHS.
387 tryReassociateGEPAtIndex(GEP
, I
, RHS
, LHS
, IndexedType
))
395 NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst
*GEP
,
396 unsigned I
, Value
*LHS
,
397 Value
*RHS
, Type
*IndexedType
) {
398 // Look for GEP's closest dominator that has the same SCEV as GEP except that
399 // the I-th index is replaced with LHS.
400 SmallVector
<const SCEV
*, 4> IndexExprs
;
401 for (Use
&Index
: GEP
->indices())
402 IndexExprs
.push_back(SE
->getSCEV(Index
));
403 // Replace the I-th index with LHS.
404 IndexExprs
[I
] = SE
->getSCEV(LHS
);
405 if (isKnownNonNegative(LHS
, SimplifyQuery(*DL
, DT
, AC
, GEP
)) &&
406 DL
->getTypeSizeInBits(LHS
->getType()).getFixedValue() <
407 DL
->getTypeSizeInBits(GEP
->getOperand(I
)->getType())
409 // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
410 // zext if the source operand is proved non-negative. We should do that
411 // consistently so that CandidateExpr more likely appears before. See
412 // @reassociate_gep_assume for an example of this canonicalization.
414 SE
->getZeroExtendExpr(IndexExprs
[I
], GEP
->getOperand(I
)->getType());
416 const SCEV
*CandidateExpr
= SE
->getGEPExpr(cast
<GEPOperator
>(GEP
),
419 Value
*Candidate
= findClosestMatchingDominator(CandidateExpr
, GEP
);
420 if (Candidate
== nullptr)
423 IRBuilder
<> Builder(GEP
);
424 // Candidate should have the same pointer type as GEP.
425 assert(Candidate
->getType() == GEP
->getType());
427 // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
428 uint64_t IndexedSize
= DL
->getTypeAllocSize(IndexedType
);
429 Type
*ElementType
= GEP
->getResultElementType();
430 uint64_t ElementSize
= DL
->getTypeAllocSize(ElementType
);
431 // Another less rare case: because I is not necessarily the last index of the
432 // GEP, the size of the type at the I-th index (IndexedSize) is not
433 // necessarily divisible by ElementSize. For example,
442 // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.
444 // TODO: bail out on this case for now. We could emit uglygep.
445 if (IndexedSize
% ElementSize
!= 0)
448 // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
449 Type
*PtrIdxTy
= DL
->getIndexType(GEP
->getType());
450 if (RHS
->getType() != PtrIdxTy
)
451 RHS
= Builder
.CreateSExtOrTrunc(RHS
, PtrIdxTy
);
452 if (IndexedSize
!= ElementSize
) {
453 RHS
= Builder
.CreateMul(
454 RHS
, ConstantInt::get(PtrIdxTy
, IndexedSize
/ ElementSize
));
456 GetElementPtrInst
*NewGEP
= cast
<GetElementPtrInst
>(
457 Builder
.CreateGEP(GEP
->getResultElementType(), Candidate
, RHS
));
458 NewGEP
->setIsInBounds(GEP
->isInBounds());
459 NewGEP
->takeName(GEP
);
463 Instruction
*NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator
*I
) {
464 Value
*LHS
= I
->getOperand(0), *RHS
= I
->getOperand(1);
465 // There is no need to reassociate 0.
466 if (SE
->getSCEV(I
)->isZero())
468 if (auto *NewI
= tryReassociateBinaryOp(LHS
, RHS
, I
))
470 if (auto *NewI
= tryReassociateBinaryOp(RHS
, LHS
, I
))
475 Instruction
*NaryReassociatePass::tryReassociateBinaryOp(Value
*LHS
, Value
*RHS
,
477 Value
*A
= nullptr, *B
= nullptr;
478 // To be conservative, we reassociate I only when it is the only user of (A op
480 if (LHS
->hasOneUse() && matchTernaryOp(I
, LHS
, A
, B
)) {
481 // I = (A op B) op RHS
482 // = (A op RHS) op B or (B op RHS) op A
483 const SCEV
*AExpr
= SE
->getSCEV(A
), *BExpr
= SE
->getSCEV(B
);
484 const SCEV
*RHSExpr
= SE
->getSCEV(RHS
);
485 if (BExpr
!= RHSExpr
) {
487 tryReassociatedBinaryOp(getBinarySCEV(I
, AExpr
, RHSExpr
), B
, I
))
490 if (AExpr
!= RHSExpr
) {
492 tryReassociatedBinaryOp(getBinarySCEV(I
, BExpr
, RHSExpr
), A
, I
))
499 Instruction
*NaryReassociatePass::tryReassociatedBinaryOp(const SCEV
*LHSExpr
,
502 // Look for the closest dominator LHS of I that computes LHSExpr, and replace
503 // I with LHS op RHS.
504 auto *LHS
= findClosestMatchingDominator(LHSExpr
, I
);
508 Instruction
*NewI
= nullptr;
509 switch (I
->getOpcode()) {
510 case Instruction::Add
:
511 NewI
= BinaryOperator::CreateAdd(LHS
, RHS
, "", I
->getIterator());
513 case Instruction::Mul
:
514 NewI
= BinaryOperator::CreateMul(LHS
, RHS
, "", I
->getIterator());
517 llvm_unreachable("Unexpected instruction.");
519 NewI
->setDebugLoc(I
->getDebugLoc());
524 bool NaryReassociatePass::matchTernaryOp(BinaryOperator
*I
, Value
*V
,
525 Value
*&Op1
, Value
*&Op2
) {
526 switch (I
->getOpcode()) {
527 case Instruction::Add
:
528 return match(V
, m_Add(m_Value(Op1
), m_Value(Op2
)));
529 case Instruction::Mul
:
530 return match(V
, m_Mul(m_Value(Op1
), m_Value(Op2
)));
532 llvm_unreachable("Unexpected instruction.");
537 const SCEV
*NaryReassociatePass::getBinarySCEV(BinaryOperator
*I
,
540 switch (I
->getOpcode()) {
541 case Instruction::Add
:
542 return SE
->getAddExpr(LHS
, RHS
);
543 case Instruction::Mul
:
544 return SE
->getMulExpr(LHS
, RHS
);
546 llvm_unreachable("Unexpected instruction.");
552 NaryReassociatePass::findClosestMatchingDominator(const SCEV
*CandidateExpr
,
553 Instruction
*Dominatee
) {
554 auto Pos
= SeenExprs
.find(CandidateExpr
);
555 if (Pos
== SeenExprs
.end())
558 auto &Candidates
= Pos
->second
;
559 // Because we process the basic blocks in pre-order of the dominator tree, a
560 // candidate that doesn't dominate the current instruction won't dominate any
561 // future instruction either. Therefore, we pop it out of the stack. This
562 // optimization makes the algorithm O(n).
563 while (!Candidates
.empty()) {
564 // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
565 // removed during rewriting.
566 if (Value
*Candidate
= Candidates
.pop_back_val()) {
567 Instruction
*CandidateInstruction
= cast
<Instruction
>(Candidate
);
568 if (!DT
->dominates(CandidateInstruction
, Dominatee
))
571 // Make sure that the instruction is safe to reuse without introducing
573 SmallVector
<Instruction
*> DropPoisonGeneratingInsts
;
574 if (!SE
->canReuseInstruction(CandidateExpr
, CandidateInstruction
,
575 DropPoisonGeneratingInsts
))
578 for (Instruction
*I
: DropPoisonGeneratingInsts
)
579 I
->dropPoisonGeneratingAnnotations();
581 return CandidateInstruction
;
587 template <typename MaxMinT
> static SCEVTypes
convertToSCEVype(MaxMinT
&MM
) {
588 if (std::is_same_v
<smax_pred_ty
, typename
MaxMinT::PredType
>)
590 else if (std::is_same_v
<umax_pred_ty
, typename
MaxMinT::PredType
>)
592 else if (std::is_same_v
<smin_pred_ty
, typename
MaxMinT::PredType
>)
594 else if (std::is_same_v
<umin_pred_ty
, typename
MaxMinT::PredType
>)
597 llvm_unreachable("Can't convert MinMax pattern to SCEV type");
602 // I - instruction matched by MaxMinMatch matcher
603 // MaxMinMatch - min/max idiom matcher
604 // LHS - first operand of I
605 // RHS - second operand of I
606 template <typename MaxMinT
>
607 Value
*NaryReassociatePass::tryReassociateMinOrMax(Instruction
*I
,
609 Value
*LHS
, Value
*RHS
) {
610 Value
*A
= nullptr, *B
= nullptr;
611 MaxMinT
m_MaxMin(m_Value(A
), m_Value(B
));
613 if (LHS
->hasNUsesOrMore(3) ||
614 // The optimization is profitable only if LHS can be removed in the end.
615 // In other words LHS should be used (directly or indirectly) by I only.
616 llvm::any_of(LHS
->users(),
619 !(U
->hasOneUser() && *U
->users().begin() == I
);
621 !match(LHS
, m_MaxMin
))
624 auto tryCombination
= [&](Value
*A
, const SCEV
*AExpr
, Value
*B
,
625 const SCEV
*BExpr
, Value
*C
,
626 const SCEV
*CExpr
) -> Value
* {
627 SmallVector
<const SCEV
*, 2> Ops1
{BExpr
, AExpr
};
628 const SCEVTypes SCEVType
= convertToSCEVype(m_MaxMin
);
629 const SCEV
*R1Expr
= SE
->getMinMaxExpr(SCEVType
, Ops1
);
631 Instruction
*R1MinMax
= findClosestMatchingDominator(R1Expr
, I
);
636 LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax
<< "\n");
638 SmallVector
<const SCEV
*, 2> Ops2
{SE
->getUnknown(C
),
639 SE
->getUnknown(R1MinMax
)};
640 const SCEV
*R2Expr
= SE
->getMinMaxExpr(SCEVType
, Ops2
);
642 SCEVExpander
Expander(*SE
, *DL
, "nary-reassociate");
643 Value
*NewMinMax
= Expander
.expandCodeFor(R2Expr
, I
->getType(), I
);
644 NewMinMax
->setName(Twine(I
->getName()).concat(".nary"));
646 LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I
<< "\n"
647 << "NARY: Inserting: " << *NewMinMax
<< "\n");
651 const SCEV
*AExpr
= SE
->getSCEV(A
);
652 const SCEV
*BExpr
= SE
->getSCEV(B
);
653 const SCEV
*RHSExpr
= SE
->getSCEV(RHS
);
655 if (BExpr
!= RHSExpr
) {
656 // Try (A op RHS) op B
657 if (auto *NewMinMax
= tryCombination(A
, AExpr
, RHS
, RHSExpr
, B
, BExpr
))
661 if (AExpr
!= RHSExpr
) {
662 // Try (RHS op B) op A
663 if (auto *NewMinMax
= tryCombination(RHS
, RHSExpr
, B
, BExpr
, A
, AExpr
))