1 //===- Reassociate.cpp - Reassociate binary expressions -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass reassociates commutative expressions in an order that is designed
10 // to promote better constant propagation, GCSE, LICM, PRE, etc.
12 // For example: 4 + (x + 5) -> x + (4 + 5)
14 // In the implementation of this algorithm, constants are assigned rank = 0,
15 // function arguments are rank = 1, and other values are assigned ranks
16 // corresponding to the reverse post order traversal of current function
17 // (starting at 2), which effectively gives values in deep loops higher rank
18 // than values not in loops.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/Transforms/Scalar/Reassociate.h"
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/PostOrderIterator.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/BasicAliasAnalysis.h"
32 #include "llvm/Analysis/ConstantFolding.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/ValueTracking.h"
35 #include "llvm/IR/Argument.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Operator.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/IR/PatternMatch.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/User.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/InitializePasses.h"
53 #include "llvm/Pass.h"
54 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/CommandLine.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include "llvm/Transforms/Scalar.h"
59 #include "llvm/Transforms/Utils/Local.h"
65 using namespace reassociate
;
66 using namespace PatternMatch
;
68 #define DEBUG_TYPE "reassociate"
70 STATISTIC(NumChanged
, "Number of insts reassociated");
71 STATISTIC(NumAnnihil
, "Number of expr tree annihilated");
72 STATISTIC(NumFactor
, "Number of multiplies factored");
75 UseCSELocalOpt(DEBUG_TYPE
"-use-cse-local",
76 cl::desc("Only reorder expressions within a basic block "
77 "when exposing CSE opportunities"),
78 cl::init(true), cl::Hidden
);
81 /// Print out the expression identified in the Ops list.
82 static void PrintOps(Instruction
*I
, const SmallVectorImpl
<ValueEntry
> &Ops
) {
83 Module
*M
= I
->getModule();
84 dbgs() << Instruction::getOpcodeName(I
->getOpcode()) << " "
85 << *Ops
[0].Op
->getType() << '\t';
86 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
88 Ops
[i
].Op
->printAsOperand(dbgs(), false, M
);
89 dbgs() << ", #" << Ops
[i
].Rank
<< "] ";
94 /// Utility class representing a non-constant Xor-operand. We classify
95 /// non-constant Xor-Operands into two categories:
96 /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
98 /// C2.1) The operand is in the form of "X | C", where C is a non-zero
100 /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101 /// operand as "E | 0"
102 class llvm::reassociate::XorOpnd
{
106 bool isInvalid() const { return SymbolicPart
== nullptr; }
107 bool isOrExpr() const { return isOr
; }
108 Value
*getValue() const { return OrigVal
; }
109 Value
*getSymbolicPart() const { return SymbolicPart
; }
110 unsigned getSymbolicRank() const { return SymbolicRank
; }
111 const APInt
&getConstPart() const { return ConstPart
; }
113 void Invalidate() { SymbolicPart
= OrigVal
= nullptr; }
114 void setSymbolicRank(unsigned R
) { SymbolicRank
= R
; }
120 unsigned SymbolicRank
;
124 XorOpnd::XorOpnd(Value
*V
) {
125 assert(!isa
<ConstantInt
>(V
) && "No ConstantInt");
127 Instruction
*I
= dyn_cast
<Instruction
>(V
);
130 if (I
&& (I
->getOpcode() == Instruction::Or
||
131 I
->getOpcode() == Instruction::And
)) {
132 Value
*V0
= I
->getOperand(0);
133 Value
*V1
= I
->getOperand(1);
135 if (match(V0
, m_APInt(C
)))
138 if (match(V1
, m_APInt(C
))) {
141 isOr
= (I
->getOpcode() == Instruction::Or
);
146 // view the operand as "V | 0"
148 ConstPart
= APInt::getZero(V
->getType()->getScalarSizeInBits());
152 /// Return true if I is an instruction with the FastMathFlags that are needed
153 /// for general reassociation set. This is not the same as testing
154 /// Instruction::isAssociative() because it includes operations like fsub.
155 /// (This routine is only intended to be called for floating-point operations.)
156 static bool hasFPAssociativeFlags(Instruction
*I
) {
157 assert(I
&& isa
<FPMathOperator
>(I
) && "Should only check FP ops");
158 return I
->hasAllowReassoc() && I
->hasNoSignedZeros();
161 /// Return true if V is an instruction of the specified opcode and if it
162 /// only has one use.
163 static BinaryOperator
*isReassociableOp(Value
*V
, unsigned Opcode
) {
164 auto *BO
= dyn_cast
<BinaryOperator
>(V
);
165 if (BO
&& BO
->hasOneUse() && BO
->getOpcode() == Opcode
)
166 if (!isa
<FPMathOperator
>(BO
) || hasFPAssociativeFlags(BO
))
171 static BinaryOperator
*isReassociableOp(Value
*V
, unsigned Opcode1
,
173 auto *BO
= dyn_cast
<BinaryOperator
>(V
);
174 if (BO
&& BO
->hasOneUse() &&
175 (BO
->getOpcode() == Opcode1
|| BO
->getOpcode() == Opcode2
))
176 if (!isa
<FPMathOperator
>(BO
) || hasFPAssociativeFlags(BO
))
181 void ReassociatePass::BuildRankMap(Function
&F
,
182 ReversePostOrderTraversal
<Function
*> &RPOT
) {
185 // Assign distinct ranks to function arguments.
186 for (auto &Arg
: F
.args()) {
187 ValueRankMap
[&Arg
] = ++Rank
;
188 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg
.getName() << "] = " << Rank
192 // Traverse basic blocks in ReversePostOrder.
193 for (BasicBlock
*BB
: RPOT
) {
194 unsigned BBRank
= RankMap
[BB
] = ++Rank
<< 16;
196 // Walk the basic block, adding precomputed ranks for any instructions that
197 // we cannot move. This ensures that the ranks for these instructions are
198 // all different in the block.
199 for (Instruction
&I
: *BB
)
200 if (mayHaveNonDefUseDependency(I
))
201 ValueRankMap
[&I
] = ++BBRank
;
205 unsigned ReassociatePass::getRank(Value
*V
) {
206 Instruction
*I
= dyn_cast
<Instruction
>(V
);
208 if (isa
<Argument
>(V
)) return ValueRankMap
[V
]; // Function argument.
209 return 0; // Otherwise it's a global or constant, rank 0.
212 if (unsigned Rank
= ValueRankMap
[I
])
213 return Rank
; // Rank already known?
215 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
216 // we can reassociate expressions for code motion! Since we do not recurse
217 // for PHI nodes, we cannot have infinite recursion here, because there
218 // cannot be loops in the value graph that do not go through PHI nodes.
219 unsigned Rank
= 0, MaxRank
= RankMap
[I
->getParent()];
220 for (unsigned i
= 0, e
= I
->getNumOperands(); i
!= e
&& Rank
!= MaxRank
; ++i
)
221 Rank
= std::max(Rank
, getRank(I
->getOperand(i
)));
223 // If this is a 'not' or 'neg' instruction, do not count it for rank. This
224 // assures us that X and ~X will have the same rank.
225 if (!match(I
, m_Not(m_Value())) && !match(I
, m_Neg(m_Value())) &&
226 !match(I
, m_FNeg(m_Value())))
229 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V
->getName() << "] = " << Rank
232 return ValueRankMap
[I
] = Rank
;
235 // Canonicalize constants to RHS. Otherwise, sort the operands by rank.
236 void ReassociatePass::canonicalizeOperands(Instruction
*I
) {
237 assert(isa
<BinaryOperator
>(I
) && "Expected binary operator.");
238 assert(I
->isCommutative() && "Expected commutative operator.");
240 Value
*LHS
= I
->getOperand(0);
241 Value
*RHS
= I
->getOperand(1);
242 if (LHS
== RHS
|| isa
<Constant
>(RHS
))
244 if (isa
<Constant
>(LHS
) || getRank(RHS
) < getRank(LHS
))
245 cast
<BinaryOperator
>(I
)->swapOperands();
248 static BinaryOperator
*CreateAdd(Value
*S1
, Value
*S2
, const Twine
&Name
,
249 BasicBlock::iterator InsertBefore
,
251 if (S1
->getType()->isIntOrIntVectorTy())
252 return BinaryOperator::CreateAdd(S1
, S2
, Name
, InsertBefore
);
254 BinaryOperator
*Res
=
255 BinaryOperator::CreateFAdd(S1
, S2
, Name
, InsertBefore
);
256 Res
->setFastMathFlags(cast
<FPMathOperator
>(FlagsOp
)->getFastMathFlags());
261 static BinaryOperator
*CreateMul(Value
*S1
, Value
*S2
, const Twine
&Name
,
262 BasicBlock::iterator InsertBefore
,
264 if (S1
->getType()->isIntOrIntVectorTy())
265 return BinaryOperator::CreateMul(S1
, S2
, Name
, InsertBefore
);
267 BinaryOperator
*Res
=
268 BinaryOperator::CreateFMul(S1
, S2
, Name
, InsertBefore
);
269 Res
->setFastMathFlags(cast
<FPMathOperator
>(FlagsOp
)->getFastMathFlags());
274 static Instruction
*CreateNeg(Value
*S1
, const Twine
&Name
,
275 BasicBlock::iterator InsertBefore
,
277 if (S1
->getType()->isIntOrIntVectorTy())
278 return BinaryOperator::CreateNeg(S1
, Name
, InsertBefore
);
280 if (auto *FMFSource
= dyn_cast
<Instruction
>(FlagsOp
))
281 return UnaryOperator::CreateFNegFMF(S1
, FMFSource
, Name
, InsertBefore
);
283 return UnaryOperator::CreateFNeg(S1
, Name
, InsertBefore
);
286 /// Replace 0-X with X*-1.
287 static BinaryOperator
*LowerNegateToMultiply(Instruction
*Neg
) {
288 assert((isa
<UnaryOperator
>(Neg
) || isa
<BinaryOperator
>(Neg
)) &&
289 "Expected a Negate!");
290 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
291 unsigned OpNo
= isa
<BinaryOperator
>(Neg
) ? 1 : 0;
292 Type
*Ty
= Neg
->getType();
293 Constant
*NegOne
= Ty
->isIntOrIntVectorTy() ?
294 ConstantInt::getAllOnesValue(Ty
) : ConstantFP::get(Ty
, -1.0);
296 BinaryOperator
*Res
=
297 CreateMul(Neg
->getOperand(OpNo
), NegOne
, "", Neg
->getIterator(), Neg
);
298 Neg
->setOperand(OpNo
, Constant::getNullValue(Ty
)); // Drop use of op.
300 Neg
->replaceAllUsesWith(Res
);
301 Res
->setDebugLoc(Neg
->getDebugLoc());
305 using RepeatedValue
= std::pair
<Value
*, uint64_t>;
307 /// Given an associative binary expression, return the leaf
308 /// nodes in Ops along with their weights (how many times the leaf occurs). The
309 /// original expression is the same as
310 /// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
312 /// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
316 /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
318 /// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
320 /// This routine may modify the function, in which case it returns 'true'. The
321 /// changes it makes may well be destructive, changing the value computed by 'I'
322 /// to something completely different. Thus if the routine returns 'true' then
323 /// you MUST either replace I with a new expression computed from the Ops array,
324 /// or use RewriteExprTree to put the values back in.
326 /// A leaf node is either not a binary operation of the same kind as the root
327 /// node 'I' (i.e. is not a binary operator at all, or is, but with a different
328 /// opcode), or is the same kind of binary operator but has a use which either
329 /// does not belong to the expression, or does belong to the expression but is
330 /// a leaf node. Every leaf node has at least one use that is a non-leaf node
331 /// of the expression, while for non-leaf nodes (except for the root 'I') every
332 /// use is a non-leaf node of the expression.
335 /// expression graph node names
345 /// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
346 /// that order) (C, 1), (E, 1), (F, 2), (G, 2).
348 /// The expression is maximal: if some instruction is a binary operator of the
349 /// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
350 /// then the instruction also belongs to the expression, is not a leaf node of
351 /// it, and its operands also belong to the expression (but may be leaf nodes).
353 /// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
354 /// order to ensure that every non-root node in the expression has *exactly one*
355 /// use by a non-leaf node of the expression. This destruction means that the
356 /// caller MUST either replace 'I' with a new expression or use something like
357 /// RewriteExprTree to put the values back in if the routine indicates that it
358 /// made a change by returning 'true'.
360 /// In the above example either the right operand of A or the left operand of B
361 /// will be replaced by undef. If it is B's operand then this gives:
365 /// + + | A, B - operand of B replaced with undef
371 /// Note that such undef operands can only be reached by passing through 'I'.
372 /// For example, if you visit operands recursively starting from a leaf node
373 /// then you will never see such an undef operand unless you get back to 'I',
374 /// which requires passing through a phi node.
376 /// Note that this routine may also mutate binary operators of the wrong type
377 /// that have all uses inside the expression (i.e. only used by non-leaf nodes
378 /// of the expression) if it can turn them into binary operators of the right
379 /// type and thus make the expression bigger.
380 static bool LinearizeExprTree(Instruction
*I
,
381 SmallVectorImpl
<RepeatedValue
> &Ops
,
382 ReassociatePass::OrderedSet
&ToRedo
,
383 reassociate::OverflowTracking
&Flags
) {
384 assert((isa
<UnaryOperator
>(I
) || isa
<BinaryOperator
>(I
)) &&
385 "Expected a UnaryOperator or BinaryOperator!");
386 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I
<< '\n');
387 unsigned Opcode
= I
->getOpcode();
388 assert(I
->isAssociative() && I
->isCommutative() &&
389 "Expected an associative and commutative operation!");
391 // Visit all operands of the expression, keeping track of their weight (the
392 // number of paths from the expression root to the operand, or if you like
393 // the number of times that operand occurs in the linearized expression).
394 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
395 // while A has weight two.
397 // Worklist of non-leaf nodes (their operands are in the expression too) along
398 // with their weights, representing a certain number of paths to the operator.
399 // If an operator occurs in the worklist multiple times then we found multiple
400 // ways to get to it.
401 SmallVector
<std::pair
<Instruction
*, uint64_t>, 8> Worklist
; // (Op, Weight)
402 Worklist
.push_back(std::make_pair(I
, 1));
403 bool Changed
= false;
405 // Leaves of the expression are values that either aren't the right kind of
406 // operation (eg: a constant, or a multiply in an add tree), or are, but have
407 // some uses that are not inside the expression. For example, in I = X + X,
408 // X = A + B, the value X has two uses (by I) that are in the expression. If
409 // X has any other uses, for example in a return instruction, then we consider
410 // X to be a leaf, and won't analyze it further. When we first visit a value,
411 // if it has more than one use then at first we conservatively consider it to
412 // be a leaf. Later, as the expression is explored, we may discover some more
413 // uses of the value from inside the expression. If all uses turn out to be
414 // from within the expression (and the value is a binary operator of the right
415 // kind) then the value is no longer considered to be a leaf, and its operands
418 // Leaves - Keeps track of the set of putative leaves as well as the number of
419 // paths to each leaf seen so far.
420 using LeafMap
= DenseMap
<Value
*, uint64_t>;
421 LeafMap Leaves
; // Leaf -> Total weight so far.
422 SmallVector
<Value
*, 8> LeafOrder
; // Ensure deterministic leaf output order.
423 const DataLayout DL
= I
->getDataLayout();
426 SmallPtrSet
<Value
*, 8> Visited
; // For checking the iteration scheme.
428 while (!Worklist
.empty()) {
429 // We examine the operands of this binary operator.
430 auto [I
, Weight
] = Worklist
.pop_back_val();
432 if (isa
<OverflowingBinaryOperator
>(I
)) {
433 Flags
.HasNUW
&= I
->hasNoUnsignedWrap();
434 Flags
.HasNSW
&= I
->hasNoSignedWrap();
437 for (unsigned OpIdx
= 0; OpIdx
< I
->getNumOperands(); ++OpIdx
) { // Visit operands.
438 Value
*Op
= I
->getOperand(OpIdx
);
439 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op
<< " (" << Weight
<< ")\n");
440 assert(!Op
->use_empty() && "No uses, so how did we get to it?!");
442 // If this is a binary operation of the right kind with only one use then
443 // add its operands to the expression.
444 if (BinaryOperator
*BO
= isReassociableOp(Op
, Opcode
)) {
445 assert(Visited
.insert(Op
).second
&& "Not first visit!");
446 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op
<< " (" << Weight
<< ")\n");
447 Worklist
.push_back(std::make_pair(BO
, Weight
));
451 // Appears to be a leaf. Is the operand already in the set of leaves?
452 LeafMap::iterator It
= Leaves
.find(Op
);
453 if (It
== Leaves
.end()) {
454 // Not in the leaf map. Must be the first time we saw this operand.
455 assert(Visited
.insert(Op
).second
&& "Not first visit!");
456 if (!Op
->hasOneUse()) {
457 // This value has uses not accounted for by the expression, so it is
458 // not safe to modify. Mark it as being a leaf.
460 << "ADD USES LEAF: " << *Op
<< " (" << Weight
<< ")\n");
461 LeafOrder
.push_back(Op
);
465 // No uses outside the expression, try morphing it.
467 // Already in the leaf map.
468 assert(It
!= Leaves
.end() && Visited
.count(Op
) &&
469 "In leaf map but not visited!");
471 // Update the number of paths to the leaf.
472 It
->second
+= Weight
;
473 assert(It
->second
>= Weight
&& "Weight overflows");
475 // If we still have uses that are not accounted for by the expression
476 // then it is not safe to modify the value.
477 if (!Op
->hasOneUse())
480 // No uses outside the expression, try morphing it.
482 Leaves
.erase(It
); // Since the value may be morphed below.
485 // At this point we have a value which, first of all, is not a binary
486 // expression of the right kind, and secondly, is only used inside the
487 // expression. This means that it can safely be modified. See if we
488 // can usefully morph it into an expression of the right kind.
489 assert((!isa
<Instruction
>(Op
) ||
490 cast
<Instruction
>(Op
)->getOpcode() != Opcode
491 || (isa
<FPMathOperator
>(Op
) &&
492 !hasFPAssociativeFlags(cast
<Instruction
>(Op
)))) &&
493 "Should have been handled above!");
494 assert(Op
->hasOneUse() && "Has uses outside the expression tree!");
496 // If this is a multiply expression, turn any internal negations into
497 // multiplies by -1 so they can be reassociated. Add any users of the
498 // newly created multiplication by -1 to the redo list, so any
499 // reassociation opportunities that are exposed will be reassociated
502 if (((Opcode
== Instruction::Mul
&& match(Op
, m_Neg(m_Value()))) ||
503 (Opcode
== Instruction::FMul
&& match(Op
, m_FNeg(m_Value())))) &&
504 match(Op
, m_Instruction(Neg
))) {
506 << "MORPH LEAF: " << *Op
<< " (" << Weight
<< ") TO ");
507 Instruction
*Mul
= LowerNegateToMultiply(Neg
);
508 LLVM_DEBUG(dbgs() << *Mul
<< '\n');
509 Worklist
.push_back(std::make_pair(Mul
, Weight
));
510 for (User
*U
: Mul
->users()) {
511 if (BinaryOperator
*UserBO
= dyn_cast
<BinaryOperator
>(U
))
512 ToRedo
.insert(UserBO
);
519 // Failed to morph into an expression of the right type. This really is
521 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op
<< " (" << Weight
<< ")\n");
522 assert(!isReassociableOp(Op
, Opcode
) && "Value was morphed?");
523 LeafOrder
.push_back(Op
);
528 // The leaves, repeated according to their weights, represent the linearized
529 // form of the expression.
530 for (Value
*V
: LeafOrder
) {
531 LeafMap::iterator It
= Leaves
.find(V
);
532 if (It
== Leaves
.end())
533 // Node initially thought to be a leaf wasn't.
535 assert(!isReassociableOp(V
, Opcode
) && "Shouldn't be a leaf!");
536 uint64_t Weight
= It
->second
;
537 // Ensure the leaf is only output once.
539 Ops
.push_back(std::make_pair(V
, Weight
));
540 if (Opcode
== Instruction::Add
&& Flags
.AllKnownNonNegative
&& Flags
.HasNSW
)
541 Flags
.AllKnownNonNegative
&= isKnownNonNegative(V
, SimplifyQuery(DL
));
542 else if (Opcode
== Instruction::Mul
) {
543 // To preserve NUW we need all inputs non-zero.
544 // To preserve NSW we need all inputs strictly positive.
545 if (Flags
.AllKnownNonZero
&&
546 (Flags
.HasNUW
|| (Flags
.HasNSW
&& Flags
.AllKnownNonNegative
))) {
547 Flags
.AllKnownNonZero
&= isKnownNonZero(V
, SimplifyQuery(DL
));
548 if (Flags
.HasNSW
&& Flags
.AllKnownNonNegative
)
549 Flags
.AllKnownNonNegative
&= isKnownNonNegative(V
, SimplifyQuery(DL
));
554 // For nilpotent operations or addition there may be no operands, for example
555 // because the expression was "X xor X" or consisted of 2^Bitwidth additions:
556 // in both cases the weight reduces to 0 causing the value to be skipped.
558 Constant
*Identity
= ConstantExpr::getBinOpIdentity(Opcode
, I
->getType());
559 assert(Identity
&& "Associative operation without identity!");
560 Ops
.emplace_back(Identity
, 1);
566 /// Now that the operands for this expression tree are
567 /// linearized and optimized, emit them in-order.
568 void ReassociatePass::RewriteExprTree(BinaryOperator
*I
,
569 SmallVectorImpl
<ValueEntry
> &Ops
,
570 OverflowTracking Flags
) {
571 assert(Ops
.size() > 1 && "Single values should be used directly!");
573 // Since our optimizations should never increase the number of operations, the
574 // new expression can usually be written reusing the existing binary operators
575 // from the original expression tree, without creating any new instructions,
576 // though the rewritten expression may have a completely different topology.
577 // We take care to not change anything if the new expression will be the same
578 // as the original. If more than trivial changes (like commuting operands)
579 // were made then we are obliged to clear out any optional subclass data like
582 /// NodesToRewrite - Nodes from the original expression available for writing
583 /// the new expression into.
584 SmallVector
<BinaryOperator
*, 8> NodesToRewrite
;
585 unsigned Opcode
= I
->getOpcode();
586 BinaryOperator
*Op
= I
;
588 /// NotRewritable - The operands being written will be the leaves of the new
589 /// expression and must not be used as inner nodes (via NodesToRewrite) by
590 /// mistake. Inner nodes are always reassociable, and usually leaves are not
591 /// (if they were they would have been incorporated into the expression and so
592 /// would not be leaves), so most of the time there is no danger of this. But
593 /// in rare cases a leaf may become reassociable if an optimization kills uses
594 /// of it, or it may momentarily become reassociable during rewriting (below)
595 /// due it being removed as an operand of one of its uses. Ensure that misuse
596 /// of leaf nodes as inner nodes cannot occur by remembering all of the future
597 /// leaves and refusing to reuse any of them as inner nodes.
598 SmallPtrSet
<Value
*, 8> NotRewritable
;
599 for (const ValueEntry
&Op
: Ops
)
600 NotRewritable
.insert(Op
.Op
);
602 // ExpressionChangedStart - Non-null if the rewritten expression differs from
603 // the original in some non-trivial way, requiring the clearing of optional
604 // flags. Flags are cleared from the operator in ExpressionChangedStart up to
605 // ExpressionChangedEnd inclusive.
606 BinaryOperator
*ExpressionChangedStart
= nullptr,
607 *ExpressionChangedEnd
= nullptr;
608 for (unsigned i
= 0; ; ++i
) {
609 // The last operation (which comes earliest in the IR) is special as both
610 // operands will come from Ops, rather than just one with the other being
612 if (i
+2 == Ops
.size()) {
613 Value
*NewLHS
= Ops
[i
].Op
;
614 Value
*NewRHS
= Ops
[i
+1].Op
;
615 Value
*OldLHS
= Op
->getOperand(0);
616 Value
*OldRHS
= Op
->getOperand(1);
618 if (NewLHS
== OldLHS
&& NewRHS
== OldRHS
)
619 // Nothing changed, leave it alone.
622 if (NewLHS
== OldRHS
&& NewRHS
== OldLHS
) {
623 // The order of the operands was reversed. Swap them.
624 LLVM_DEBUG(dbgs() << "RA: " << *Op
<< '\n');
626 LLVM_DEBUG(dbgs() << "TO: " << *Op
<< '\n');
632 // The new operation differs non-trivially from the original. Overwrite
633 // the old operands with the new ones.
634 LLVM_DEBUG(dbgs() << "RA: " << *Op
<< '\n');
635 if (NewLHS
!= OldLHS
) {
636 BinaryOperator
*BO
= isReassociableOp(OldLHS
, Opcode
);
637 if (BO
&& !NotRewritable
.count(BO
))
638 NodesToRewrite
.push_back(BO
);
639 Op
->setOperand(0, NewLHS
);
641 if (NewRHS
!= OldRHS
) {
642 BinaryOperator
*BO
= isReassociableOp(OldRHS
, Opcode
);
643 if (BO
&& !NotRewritable
.count(BO
))
644 NodesToRewrite
.push_back(BO
);
645 Op
->setOperand(1, NewRHS
);
647 LLVM_DEBUG(dbgs() << "TO: " << *Op
<< '\n');
649 ExpressionChangedStart
= Op
;
650 if (!ExpressionChangedEnd
)
651 ExpressionChangedEnd
= Op
;
658 // Not the last operation. The left-hand side will be a sub-expression
659 // while the right-hand side will be the current element of Ops.
660 Value
*NewRHS
= Ops
[i
].Op
;
661 if (NewRHS
!= Op
->getOperand(1)) {
662 LLVM_DEBUG(dbgs() << "RA: " << *Op
<< '\n');
663 if (NewRHS
== Op
->getOperand(0)) {
664 // The new right-hand side was already present as the left operand. If
665 // we are lucky then swapping the operands will sort out both of them.
668 // Overwrite with the new right-hand side.
669 BinaryOperator
*BO
= isReassociableOp(Op
->getOperand(1), Opcode
);
670 if (BO
&& !NotRewritable
.count(BO
))
671 NodesToRewrite
.push_back(BO
);
672 Op
->setOperand(1, NewRHS
);
673 ExpressionChangedStart
= Op
;
674 if (!ExpressionChangedEnd
)
675 ExpressionChangedEnd
= Op
;
677 LLVM_DEBUG(dbgs() << "TO: " << *Op
<< '\n');
682 // Now deal with the left-hand side. If this is already an operation node
683 // from the original expression then just rewrite the rest of the expression
685 BinaryOperator
*BO
= isReassociableOp(Op
->getOperand(0), Opcode
);
686 if (BO
&& !NotRewritable
.count(BO
)) {
691 // Otherwise, grab a spare node from the original expression and use that as
692 // the left-hand side. If there are no nodes left then the optimizers made
693 // an expression with more nodes than the original! This usually means that
694 // they did something stupid but it might mean that the problem was just too
695 // hard (finding the mimimal number of multiplications needed to realize a
696 // multiplication expression is NP-complete). Whatever the reason, smart or
697 // stupid, create a new node if there are none left.
698 BinaryOperator
*NewOp
;
699 if (NodesToRewrite
.empty()) {
700 Constant
*Poison
= PoisonValue::get(I
->getType());
701 NewOp
= BinaryOperator::Create(Instruction::BinaryOps(Opcode
), Poison
,
702 Poison
, "", I
->getIterator());
703 if (isa
<FPMathOperator
>(NewOp
))
704 NewOp
->setFastMathFlags(I
->getFastMathFlags());
706 NewOp
= NodesToRewrite
.pop_back_val();
709 LLVM_DEBUG(dbgs() << "RA: " << *Op
<< '\n');
710 Op
->setOperand(0, NewOp
);
711 LLVM_DEBUG(dbgs() << "TO: " << *Op
<< '\n');
712 ExpressionChangedStart
= Op
;
713 if (!ExpressionChangedEnd
)
714 ExpressionChangedEnd
= Op
;
720 // If the expression changed non-trivially then clear out all subclass data
721 // starting from the operator specified in ExpressionChanged, and compactify
722 // the operators to just before the expression root to guarantee that the
723 // expression tree is dominated by all of Ops.
724 if (ExpressionChangedStart
) {
725 bool ClearFlags
= true;
729 if (isa
<FPMathOperator
>(I
)) {
730 FastMathFlags Flags
= I
->getFastMathFlags();
731 ExpressionChangedStart
->clearSubclassOptionalData();
732 ExpressionChangedStart
->setFastMathFlags(Flags
);
734 ExpressionChangedStart
->clearSubclassOptionalData();
735 if (ExpressionChangedStart
->getOpcode() == Instruction::Add
||
736 (ExpressionChangedStart
->getOpcode() == Instruction::Mul
&&
737 Flags
.AllKnownNonZero
)) {
739 ExpressionChangedStart
->setHasNoUnsignedWrap();
740 if (Flags
.HasNSW
&& (Flags
.AllKnownNonNegative
|| Flags
.HasNUW
))
741 ExpressionChangedStart
->setHasNoSignedWrap();
746 if (ExpressionChangedStart
== ExpressionChangedEnd
)
748 if (ExpressionChangedStart
== I
)
751 // Discard any debug info related to the expressions that has changed (we
752 // can leave debug info related to the root and any operation that didn't
753 // change, since the result of the expression tree should be the same
754 // even after reassociation).
756 replaceDbgUsesWithUndef(ExpressionChangedStart
);
758 ExpressionChangedStart
->moveBefore(I
);
759 ExpressionChangedStart
=
760 cast
<BinaryOperator
>(*ExpressionChangedStart
->user_begin());
764 // Throw away any left over nodes from the original expression.
765 for (BinaryOperator
*BO
: NodesToRewrite
)
766 RedoInsts
.insert(BO
);
769 /// Insert instructions before the instruction pointed to by BI,
770 /// that computes the negative version of the value specified. The negative
771 /// version of the value is returned, and BI is left pointing at the instruction
772 /// that should be processed next by the reassociation pass.
773 /// Also add intermediate instructions to the redo list that are modified while
774 /// pushing the negates through adds. These will be revisited to see if
775 /// additional opportunities have been exposed.
776 static Value
*NegateValue(Value
*V
, Instruction
*BI
,
777 ReassociatePass::OrderedSet
&ToRedo
) {
778 if (auto *C
= dyn_cast
<Constant
>(V
)) {
779 const DataLayout
&DL
= BI
->getDataLayout();
780 Constant
*Res
= C
->getType()->isFPOrFPVectorTy()
781 ? ConstantFoldUnaryOpOperand(Instruction::FNeg
, C
, DL
)
782 : ConstantExpr::getNeg(C
);
787 // We are trying to expose opportunity for reassociation. One of the things
788 // that we want to do to achieve this is to push a negation as deep into an
789 // expression chain as possible, to expose the add instructions. In practice,
790 // this means that we turn this:
791 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
792 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
793 // the constants. We assume that instcombine will clean up the mess later if
794 // we introduce tons of unnecessary negation instructions.
796 if (BinaryOperator
*I
=
797 isReassociableOp(V
, Instruction::Add
, Instruction::FAdd
)) {
798 // Push the negates through the add.
799 I
->setOperand(0, NegateValue(I
->getOperand(0), BI
, ToRedo
));
800 I
->setOperand(1, NegateValue(I
->getOperand(1), BI
, ToRedo
));
801 if (I
->getOpcode() == Instruction::Add
) {
802 I
->setHasNoUnsignedWrap(false);
803 I
->setHasNoSignedWrap(false);
806 // We must move the add instruction here, because the neg instructions do
807 // not dominate the old add instruction in general. By moving it, we are
808 // assured that the neg instructions we just inserted dominate the
809 // instruction we are about to insert after them.
812 I
->setName(I
->getName()+".neg");
814 // Add the intermediate negates to the redo list as processing them later
815 // could expose more reassociating opportunities.
820 // Okay, we need to materialize a negated version of V with an instruction.
821 // Scan the use lists of V to see if we have one already.
822 for (User
*U
: V
->users()) {
823 if (!match(U
, m_Neg(m_Value())) && !match(U
, m_FNeg(m_Value())))
826 // We found one! Now we have to make sure that the definition dominates
827 // this use. We do this by moving it to the entry block (if it is a
828 // non-instruction value) or right after the definition. These negates will
829 // be zapped by reassociate later, so we don't need much finesse here.
830 Instruction
*TheNeg
= dyn_cast
<Instruction
>(U
);
832 // We can't safely propagate a vector zero constant with poison/undef lanes.
834 if (match(TheNeg
, m_BinOp(m_Constant(C
), m_Value())) &&
835 C
->containsUndefOrPoisonElement())
838 // Verify that the negate is in this function, V might be a constant expr.
840 TheNeg
->getParent()->getParent() != BI
->getParent()->getParent())
843 BasicBlock::iterator InsertPt
;
844 if (Instruction
*InstInput
= dyn_cast
<Instruction
>(V
)) {
845 auto InsertPtOpt
= InstInput
->getInsertionPointAfterDef();
848 InsertPt
= *InsertPtOpt
;
850 InsertPt
= TheNeg
->getFunction()
852 .getFirstNonPHIOrDbg()
856 // Check that if TheNeg is moved out of its parent block, we drop its
857 // debug location to avoid extra coverage.
858 // See test dropping_debugloc_the_neg.ll for a detailed example.
859 if (TheNeg
->getParent() != InsertPt
->getParent())
860 TheNeg
->dropLocation();
861 TheNeg
->moveBefore(*InsertPt
->getParent(), InsertPt
);
863 if (TheNeg
->getOpcode() == Instruction::Sub
) {
864 TheNeg
->setHasNoUnsignedWrap(false);
865 TheNeg
->setHasNoSignedWrap(false);
867 TheNeg
->andIRFlags(BI
);
869 ToRedo
.insert(TheNeg
);
873 // Insert a 'neg' instruction that subtracts the value from zero to get the
875 Instruction
*NewNeg
=
876 CreateNeg(V
, V
->getName() + ".neg", BI
->getIterator(), BI
);
877 // NewNeg is generated to potentially replace BI, so use its DebugLoc.
878 NewNeg
->setDebugLoc(BI
->getDebugLoc());
879 ToRedo
.insert(NewNeg
);
883 // See if this `or` looks like an load widening reduction, i.e. that it
884 // consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
885 // ensure that the pattern is *really* a load widening reduction,
886 // we do not ensure that it can really be replaced with a widened load,
887 // only that it mostly looks like one.
888 static bool isLoadCombineCandidate(Instruction
*Or
) {
889 SmallVector
<Instruction
*, 8> Worklist
;
890 SmallSet
<Instruction
*, 8> Visited
;
892 auto Enqueue
= [&](Value
*V
) {
893 auto *I
= dyn_cast
<Instruction
>(V
);
894 // Each node of an `or` reduction must be an instruction,
896 return false; // Node is certainly not part of an `or` load reduction.
897 // Only process instructions we have never processed before.
898 if (Visited
.insert(I
).second
)
899 Worklist
.emplace_back(I
);
900 return true; // Will need to look at parent nodes.
904 return false; // Not an `or` reduction pattern.
906 while (!Worklist
.empty()) {
907 auto *I
= Worklist
.pop_back_val();
909 // Okay, which instruction is this node?
910 switch (I
->getOpcode()) {
911 case Instruction::Or
:
912 // Got an `or` node. That's fine, just recurse into it's operands.
913 for (Value
*Op
: I
->operands())
915 return false; // Not an `or` reduction pattern.
918 case Instruction::Shl
:
919 case Instruction::ZExt
:
920 // `shl`/`zext` nodes are fine, just recurse into their base operand.
921 if (!Enqueue(I
->getOperand(0)))
922 return false; // Not an `or` reduction pattern.
925 case Instruction::Load
:
926 // Perfect, `load` node means we've reached an edge of the graph.
929 default: // Unknown node.
930 return false; // Not an `or` reduction pattern.
937 /// Return true if it may be profitable to convert this (X|Y) into (X+Y).
938 static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction
*Or
) {
939 // Don't bother to convert this up unless either the LHS is an associable add
940 // or subtract or mul or if this is only used by one of the above.
941 // This is only a compile-time improvement, it is not needed for correctness!
942 auto isInteresting
= [](Value
*V
) {
943 for (auto Op
: {Instruction::Add
, Instruction::Sub
, Instruction::Mul
,
945 if (isReassociableOp(V
, Op
))
950 if (any_of(Or
->operands(), isInteresting
))
953 Value
*VB
= Or
->user_back();
954 if (Or
->hasOneUse() && isInteresting(VB
))
960 /// If we have (X|Y), and iff X and Y have no common bits set,
961 /// transform this into (X+Y) to allow arithmetics reassociation.
962 static BinaryOperator
*convertOrWithNoCommonBitsToAdd(Instruction
*Or
) {
963 // Convert an or into an add.
964 BinaryOperator
*New
= CreateAdd(Or
->getOperand(0), Or
->getOperand(1), "",
965 Or
->getIterator(), Or
);
966 New
->setHasNoSignedWrap();
967 New
->setHasNoUnsignedWrap();
970 // Everyone now refers to the add instruction.
971 Or
->replaceAllUsesWith(New
);
972 New
->setDebugLoc(Or
->getDebugLoc());
974 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New
<< '\n');
978 /// Return true if we should break up this subtract of X-Y into (X + -Y).
979 static bool ShouldBreakUpSubtract(Instruction
*Sub
) {
980 // If this is a negation, we can't split it up!
981 if (match(Sub
, m_Neg(m_Value())) || match(Sub
, m_FNeg(m_Value())))
984 // Don't breakup X - undef.
985 if (isa
<UndefValue
>(Sub
->getOperand(1)))
988 // Don't bother to break this up unless either the LHS is an associable add or
989 // subtract or if this is only used by one.
990 Value
*V0
= Sub
->getOperand(0);
991 if (isReassociableOp(V0
, Instruction::Add
, Instruction::FAdd
) ||
992 isReassociableOp(V0
, Instruction::Sub
, Instruction::FSub
))
994 Value
*V1
= Sub
->getOperand(1);
995 if (isReassociableOp(V1
, Instruction::Add
, Instruction::FAdd
) ||
996 isReassociableOp(V1
, Instruction::Sub
, Instruction::FSub
))
998 Value
*VB
= Sub
->user_back();
999 if (Sub
->hasOneUse() &&
1000 (isReassociableOp(VB
, Instruction::Add
, Instruction::FAdd
) ||
1001 isReassociableOp(VB
, Instruction::Sub
, Instruction::FSub
)))
1007 /// If we have (X-Y), and if either X is an add, or if this is only used by an
1008 /// add, transform this into (X+(0-Y)) to promote better reassociation.
1009 static BinaryOperator
*BreakUpSubtract(Instruction
*Sub
,
1010 ReassociatePass::OrderedSet
&ToRedo
) {
1011 // Convert a subtract into an add and a neg instruction. This allows sub
1012 // instructions to be commuted with other add instructions.
1014 // Calculate the negative value of Operand 1 of the sub instruction,
1015 // and set it as the RHS of the add instruction we just made.
1016 Value
*NegVal
= NegateValue(Sub
->getOperand(1), Sub
, ToRedo
);
1017 BinaryOperator
*New
=
1018 CreateAdd(Sub
->getOperand(0), NegVal
, "", Sub
->getIterator(), Sub
);
1019 Sub
->setOperand(0, Constant::getNullValue(Sub
->getType())); // Drop use of op.
1020 Sub
->setOperand(1, Constant::getNullValue(Sub
->getType())); // Drop use of op.
1023 // Everyone now refers to the add instruction.
1024 Sub
->replaceAllUsesWith(New
);
1025 New
->setDebugLoc(Sub
->getDebugLoc());
1027 LLVM_DEBUG(dbgs() << "Negated: " << *New
<< '\n');
1031 /// If this is a shift of a reassociable multiply or is used by one, change
1032 /// this into a multiply by a constant to assist with further reassociation.
1033 static BinaryOperator
*ConvertShiftToMul(Instruction
*Shl
) {
1034 Constant
*MulCst
= ConstantInt::get(Shl
->getType(), 1);
1035 auto *SA
= cast
<ConstantInt
>(Shl
->getOperand(1));
1036 MulCst
= ConstantFoldBinaryInstruction(Instruction::Shl
, MulCst
, SA
);
1037 assert(MulCst
&& "Constant folding of immediate constants failed");
1039 BinaryOperator
*Mul
= BinaryOperator::CreateMul(Shl
->getOperand(0), MulCst
,
1040 "", Shl
->getIterator());
1041 Shl
->setOperand(0, PoisonValue::get(Shl
->getType())); // Drop use of op.
1044 // Everyone now refers to the mul instruction.
1045 Shl
->replaceAllUsesWith(Mul
);
1046 Mul
->setDebugLoc(Shl
->getDebugLoc());
1048 // We can safely preserve the nuw flag in all cases. It's also safe to turn a
1049 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1050 // handling. It can be preserved as long as we're not left shifting by
1052 bool NSW
= cast
<BinaryOperator
>(Shl
)->hasNoSignedWrap();
1053 bool NUW
= cast
<BinaryOperator
>(Shl
)->hasNoUnsignedWrap();
1054 unsigned BitWidth
= Shl
->getType()->getIntegerBitWidth();
1055 if (NSW
&& (NUW
|| SA
->getValue().ult(BitWidth
- 1)))
1056 Mul
->setHasNoSignedWrap(true);
1057 Mul
->setHasNoUnsignedWrap(NUW
);
1061 /// Scan backwards and forwards among values with the same rank as element i
1062 /// to see if X exists. If X does not exist, return i. This is useful when
1063 /// scanning for 'x' when we see '-x' because they both get the same rank.
1064 static unsigned FindInOperandList(const SmallVectorImpl
<ValueEntry
> &Ops
,
1065 unsigned i
, Value
*X
) {
1066 unsigned XRank
= Ops
[i
].Rank
;
1067 unsigned e
= Ops
.size();
1068 for (unsigned j
= i
+1; j
!= e
&& Ops
[j
].Rank
== XRank
; ++j
) {
1071 if (Instruction
*I1
= dyn_cast
<Instruction
>(Ops
[j
].Op
))
1072 if (Instruction
*I2
= dyn_cast
<Instruction
>(X
))
1073 if (I1
->isIdenticalTo(I2
))
1077 for (unsigned j
= i
-1; j
!= ~0U && Ops
[j
].Rank
== XRank
; --j
) {
1080 if (Instruction
*I1
= dyn_cast
<Instruction
>(Ops
[j
].Op
))
1081 if (Instruction
*I2
= dyn_cast
<Instruction
>(X
))
1082 if (I1
->isIdenticalTo(I2
))
1088 /// Emit a tree of add instructions, summing Ops together
1089 /// and returning the result. Insert the tree before I.
1090 static Value
*EmitAddTreeOfValues(BasicBlock::iterator It
,
1091 SmallVectorImpl
<WeakTrackingVH
> &Ops
) {
1092 if (Ops
.size() == 1) return Ops
.back();
1094 Value
*V1
= Ops
.pop_back_val();
1095 Value
*V2
= EmitAddTreeOfValues(It
, Ops
);
1096 return CreateAdd(V2
, V1
, "reass.add", It
, &*It
);
1099 /// If V is an expression tree that is a multiplication sequence,
1100 /// and if this sequence contains a multiply by Factor,
1101 /// remove Factor from the tree and return the new tree.
1102 Value
*ReassociatePass::RemoveFactorFromExpression(Value
*V
, Value
*Factor
) {
1103 BinaryOperator
*BO
= isReassociableOp(V
, Instruction::Mul
, Instruction::FMul
);
1107 SmallVector
<RepeatedValue
, 8> Tree
;
1108 OverflowTracking Flags
;
1109 MadeChange
|= LinearizeExprTree(BO
, Tree
, RedoInsts
, Flags
);
1110 SmallVector
<ValueEntry
, 8> Factors
;
1111 Factors
.reserve(Tree
.size());
1112 for (unsigned i
= 0, e
= Tree
.size(); i
!= e
; ++i
) {
1113 RepeatedValue E
= Tree
[i
];
1114 Factors
.append(E
.second
, ValueEntry(getRank(E
.first
), E
.first
));
1117 bool FoundFactor
= false;
1118 bool NeedsNegate
= false;
1119 for (unsigned i
= 0, e
= Factors
.size(); i
!= e
; ++i
) {
1120 if (Factors
[i
].Op
== Factor
) {
1122 Factors
.erase(Factors
.begin()+i
);
1126 // If this is a negative version of this factor, remove it.
1127 if (ConstantInt
*FC1
= dyn_cast
<ConstantInt
>(Factor
)) {
1128 if (ConstantInt
*FC2
= dyn_cast
<ConstantInt
>(Factors
[i
].Op
))
1129 if (FC1
->getValue() == -FC2
->getValue()) {
1130 FoundFactor
= NeedsNegate
= true;
1131 Factors
.erase(Factors
.begin()+i
);
1134 } else if (ConstantFP
*FC1
= dyn_cast
<ConstantFP
>(Factor
)) {
1135 if (ConstantFP
*FC2
= dyn_cast
<ConstantFP
>(Factors
[i
].Op
)) {
1136 const APFloat
&F1
= FC1
->getValueAPF();
1137 APFloat
F2(FC2
->getValueAPF());
1140 FoundFactor
= NeedsNegate
= true;
1141 Factors
.erase(Factors
.begin() + i
);
1149 // Make sure to restore the operands to the expression tree.
1150 RewriteExprTree(BO
, Factors
, Flags
);
1154 BasicBlock::iterator InsertPt
= ++BO
->getIterator();
1156 // If this was just a single multiply, remove the multiply and return the only
1157 // remaining operand.
1158 if (Factors
.size() == 1) {
1159 RedoInsts
.insert(BO
);
1162 RewriteExprTree(BO
, Factors
, Flags
);
1167 V
= CreateNeg(V
, "neg", InsertPt
, BO
);
1172 /// If V is a single-use multiply, recursively add its operands as factors,
1173 /// otherwise add V to the list of factors.
1175 /// Ops is the top-level list of add operands we're trying to factor.
1176 static void FindSingleUseMultiplyFactors(Value
*V
,
1177 SmallVectorImpl
<Value
*> &Factors
) {
1178 BinaryOperator
*BO
= isReassociableOp(V
, Instruction::Mul
, Instruction::FMul
);
1180 Factors
.push_back(V
);
1184 // Otherwise, add the LHS and RHS to the list of factors.
1185 FindSingleUseMultiplyFactors(BO
->getOperand(1), Factors
);
1186 FindSingleUseMultiplyFactors(BO
->getOperand(0), Factors
);
1189 /// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1190 /// This optimizes based on identities. If it can be reduced to a single Value,
1191 /// it is returned, otherwise the Ops list is mutated as necessary.
1192 static Value
*OptimizeAndOrXor(unsigned Opcode
,
1193 SmallVectorImpl
<ValueEntry
> &Ops
) {
1194 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1195 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
1196 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
1197 // First, check for X and ~X in the operand list.
1198 assert(i
< Ops
.size());
1200 if (match(Ops
[i
].Op
, m_Not(m_Value(X
)))) { // Cannot occur for ^.
1201 unsigned FoundX
= FindInOperandList(Ops
, i
, X
);
1203 if (Opcode
== Instruction::And
) // ...&X&~X = 0
1204 return Constant::getNullValue(X
->getType());
1206 if (Opcode
== Instruction::Or
) // ...|X|~X = -1
1207 return Constant::getAllOnesValue(X
->getType());
1211 // Next, check for duplicate pairs of values, which we assume are next to
1212 // each other, due to our sorting criteria.
1213 assert(i
< Ops
.size());
1214 if (i
+1 != Ops
.size() && Ops
[i
+1].Op
== Ops
[i
].Op
) {
1215 if (Opcode
== Instruction::And
|| Opcode
== Instruction::Or
) {
1216 // Drop duplicate values for And and Or.
1217 Ops
.erase(Ops
.begin()+i
);
1223 // Drop pairs of values for Xor.
1224 assert(Opcode
== Instruction::Xor
);
1226 return Constant::getNullValue(Ops
[0].Op
->getType());
1229 Ops
.erase(Ops
.begin()+i
, Ops
.begin()+i
+2);
1237 /// Helper function of CombineXorOpnd(). It creates a bitwise-and
1238 /// instruction with the given two operands, and return the resulting
1239 /// instruction. There are two special cases: 1) if the constant operand is 0,
1240 /// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1242 static Value
*createAndInstr(BasicBlock::iterator InsertBefore
, Value
*Opnd
,
1243 const APInt
&ConstOpnd
) {
1244 if (ConstOpnd
.isZero())
1247 if (ConstOpnd
.isAllOnes())
1250 Instruction
*I
= BinaryOperator::CreateAnd(
1251 Opnd
, ConstantInt::get(Opnd
->getType(), ConstOpnd
), "and.ra",
1253 I
->setDebugLoc(InsertBefore
->getDebugLoc());
1257 // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1258 // into "R ^ C", where C would be 0, and R is a symbolic value.
1260 // If it was successful, true is returned, and the "R" and "C" is returned
1261 // via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1262 // and both "Res" and "ConstOpnd" remain unchanged.
1263 bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It
, XorOpnd
*Opnd1
,
1264 APInt
&ConstOpnd
, Value
*&Res
) {
1265 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
1266 // = ((x | c1) ^ c1) ^ (c1 ^ c2)
1267 // = (x & ~c1) ^ (c1 ^ c2)
1268 // It is useful only when c1 == c2.
1269 if (!Opnd1
->isOrExpr() || Opnd1
->getConstPart().isZero())
1272 if (!Opnd1
->getValue()->hasOneUse())
1275 const APInt
&C1
= Opnd1
->getConstPart();
1276 if (C1
!= ConstOpnd
)
1279 Value
*X
= Opnd1
->getSymbolicPart();
1280 Res
= createAndInstr(It
, X
, ~C1
);
1281 // ConstOpnd was C2, now C1 ^ C2.
1284 if (Instruction
*T
= dyn_cast
<Instruction
>(Opnd1
->getValue()))
1285 RedoInsts
.insert(T
);
1289 // Helper function of OptimizeXor(). It tries to simplify
1290 // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1293 // If it was successful, true is returned, and the "R" and "C" is returned
1294 // via "Res" and "ConstOpnd", respectively (If the entire expression is
1295 // evaluated to a constant, the Res is set to NULL); otherwise, false is
1296 // returned, and both "Res" and "ConstOpnd" remain unchanged.
1297 bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It
, XorOpnd
*Opnd1
,
1298 XorOpnd
*Opnd2
, APInt
&ConstOpnd
,
1300 Value
*X
= Opnd1
->getSymbolicPart();
1301 if (X
!= Opnd2
->getSymbolicPart())
1304 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1305 int DeadInstNum
= 1;
1306 if (Opnd1
->getValue()->hasOneUse())
1308 if (Opnd2
->getValue()->hasOneUse())
1312 // (x | c1) ^ (x & c2)
1313 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
1314 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1315 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1317 if (Opnd1
->isOrExpr() != Opnd2
->isOrExpr()) {
1318 if (Opnd2
->isOrExpr())
1319 std::swap(Opnd1
, Opnd2
);
1321 const APInt
&C1
= Opnd1
->getConstPart();
1322 const APInt
&C2
= Opnd2
->getConstPart();
1323 APInt
C3((~C1
) ^ C2
);
1325 // Do not increase code size!
1326 if (!C3
.isZero() && !C3
.isAllOnes()) {
1327 int NewInstNum
= ConstOpnd
.getBoolValue() ? 1 : 2;
1328 if (NewInstNum
> DeadInstNum
)
1332 Res
= createAndInstr(It
, X
, C3
);
1334 } else if (Opnd1
->isOrExpr()) {
1335 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1337 const APInt
&C1
= Opnd1
->getConstPart();
1338 const APInt
&C2
= Opnd2
->getConstPart();
1341 // Do not increase code size
1342 if (!C3
.isZero() && !C3
.isAllOnes()) {
1343 int NewInstNum
= ConstOpnd
.getBoolValue() ? 1 : 2;
1344 if (NewInstNum
> DeadInstNum
)
1348 Res
= createAndInstr(It
, X
, C3
);
1351 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1353 const APInt
&C1
= Opnd1
->getConstPart();
1354 const APInt
&C2
= Opnd2
->getConstPart();
1356 Res
= createAndInstr(It
, X
, C3
);
1359 // Put the original operands in the Redo list; hope they will be deleted
1361 if (Instruction
*T
= dyn_cast
<Instruction
>(Opnd1
->getValue()))
1362 RedoInsts
.insert(T
);
1363 if (Instruction
*T
= dyn_cast
<Instruction
>(Opnd2
->getValue()))
1364 RedoInsts
.insert(T
);
1369 /// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1370 /// to a single Value, it is returned, otherwise the Ops list is mutated as
1372 Value
*ReassociatePass::OptimizeXor(Instruction
*I
,
1373 SmallVectorImpl
<ValueEntry
> &Ops
) {
1374 if (Value
*V
= OptimizeAndOrXor(Instruction::Xor
, Ops
))
1377 if (Ops
.size() == 1)
1380 SmallVector
<XorOpnd
, 8> Opnds
;
1381 SmallVector
<XorOpnd
*, 8> OpndPtrs
;
1382 Type
*Ty
= Ops
[0].Op
->getType();
1383 APInt
ConstOpnd(Ty
->getScalarSizeInBits(), 0);
1385 // Step 1: Convert ValueEntry to XorOpnd
1386 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
1387 Value
*V
= Ops
[i
].Op
;
1389 // TODO: Support non-splat vectors.
1390 if (match(V
, m_APInt(C
))) {
1394 O
.setSymbolicRank(getRank(O
.getSymbolicPart()));
1399 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
1400 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1401 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1402 // with the previous loop --- the iterator of the "Opnds" may be invalidated
1403 // when new elements are added to the vector.
1404 for (XorOpnd
&Op
: Opnds
)
1405 OpndPtrs
.push_back(&Op
);
1407 // Step 2: Sort the Xor-Operands in a way such that the operands containing
1408 // the same symbolic value cluster together. For instance, the input operand
1409 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
1410 // ("x | 123", "x & 789", "y & 456").
1412 // The purpose is twofold:
1413 // 1) Cluster together the operands sharing the same symbolic-value.
1414 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1415 // could potentially shorten crital path, and expose more loop-invariants.
1416 // Note that values' rank are basically defined in RPO order (FIXME).
1417 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1418 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
1419 // "z" in the order of X-Y-Z is better than any other orders.
1420 llvm::stable_sort(OpndPtrs
, [](XorOpnd
*LHS
, XorOpnd
*RHS
) {
1421 return LHS
->getSymbolicRank() < RHS
->getSymbolicRank();
1424 // Step 3: Combine adjacent operands
1425 XorOpnd
*PrevOpnd
= nullptr;
1426 bool Changed
= false;
1427 for (unsigned i
= 0, e
= Opnds
.size(); i
< e
; i
++) {
1428 XorOpnd
*CurrOpnd
= OpndPtrs
[i
];
1429 // The combined value
1432 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1433 if (!ConstOpnd
.isZero() &&
1434 CombineXorOpnd(I
->getIterator(), CurrOpnd
, ConstOpnd
, CV
)) {
1437 *CurrOpnd
= XorOpnd(CV
);
1439 CurrOpnd
->Invalidate();
1444 if (!PrevOpnd
|| CurrOpnd
->getSymbolicPart() != PrevOpnd
->getSymbolicPart()) {
1445 PrevOpnd
= CurrOpnd
;
1449 // step 3.2: When previous and current operands share the same symbolic
1450 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1451 if (CombineXorOpnd(I
->getIterator(), CurrOpnd
, PrevOpnd
, ConstOpnd
, CV
)) {
1452 // Remove previous operand
1453 PrevOpnd
->Invalidate();
1455 *CurrOpnd
= XorOpnd(CV
);
1456 PrevOpnd
= CurrOpnd
;
1458 CurrOpnd
->Invalidate();
1465 // Step 4: Reassemble the Ops
1468 for (const XorOpnd
&O
: Opnds
) {
1471 ValueEntry
VE(getRank(O
.getValue()), O
.getValue());
1474 if (!ConstOpnd
.isZero()) {
1475 Value
*C
= ConstantInt::get(Ty
, ConstOpnd
);
1476 ValueEntry
VE(getRank(C
), C
);
1479 unsigned Sz
= Ops
.size();
1481 return Ops
.back().Op
;
1483 assert(ConstOpnd
.isZero());
1484 return ConstantInt::get(Ty
, ConstOpnd
);
1491 /// Optimize a series of operands to an 'add' instruction. This
1492 /// optimizes based on identities. If it can be reduced to a single Value, it
1493 /// is returned, otherwise the Ops list is mutated as necessary.
1494 Value
*ReassociatePass::OptimizeAdd(Instruction
*I
,
1495 SmallVectorImpl
<ValueEntry
> &Ops
) {
1496 // Scan the operand lists looking for X and -X pairs. If we find any, we
1497 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1499 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
1501 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
1502 Value
*TheOp
= Ops
[i
].Op
;
1503 // Check to see if we've seen this operand before. If so, we factor all
1504 // instances of the operand together. Due to our sorting criteria, we know
1505 // that these need to be next to each other in the vector.
1506 if (i
+1 != Ops
.size() && Ops
[i
+1].Op
== TheOp
) {
1507 // Rescan the list, remove all instances of this operand from the expr.
1508 unsigned NumFound
= 0;
1510 Ops
.erase(Ops
.begin()+i
);
1512 } while (i
!= Ops
.size() && Ops
[i
].Op
== TheOp
);
1514 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound
<< "]: " << *TheOp
1518 // Insert a new multiply.
1519 Type
*Ty
= TheOp
->getType();
1520 Constant
*C
= Ty
->isIntOrIntVectorTy() ?
1521 ConstantInt::get(Ty
, NumFound
) : ConstantFP::get(Ty
, NumFound
);
1522 Instruction
*Mul
= CreateMul(TheOp
, C
, "factor", I
->getIterator(), I
);
1524 // Now that we have inserted a multiply, optimize it. This allows us to
1525 // handle cases that require multiple factoring steps, such as this:
1526 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
1527 RedoInsts
.insert(Mul
);
1529 // If every add operand was a duplicate, return the multiply.
1533 // Otherwise, we had some input that didn't have the dupe, such as
1534 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of
1535 // things being added by this operation.
1536 Ops
.insert(Ops
.begin(), ValueEntry(getRank(Mul
), Mul
));
1543 // Check for X and -X or X and ~X in the operand list.
1545 if (!match(TheOp
, m_Neg(m_Value(X
))) && !match(TheOp
, m_Not(m_Value(X
))) &&
1546 !match(TheOp
, m_FNeg(m_Value(X
))))
1549 unsigned FoundX
= FindInOperandList(Ops
, i
, X
);
1553 // Remove X and -X from the operand list.
1554 if (Ops
.size() == 2 &&
1555 (match(TheOp
, m_Neg(m_Value())) || match(TheOp
, m_FNeg(m_Value()))))
1556 return Constant::getNullValue(X
->getType());
1558 // Remove X and ~X from the operand list.
1559 if (Ops
.size() == 2 && match(TheOp
, m_Not(m_Value())))
1560 return Constant::getAllOnesValue(X
->getType());
1562 Ops
.erase(Ops
.begin()+i
);
1566 --i
; // Need to back up an extra one.
1567 Ops
.erase(Ops
.begin()+FoundX
);
1569 --i
; // Revisit element.
1570 e
-= 2; // Removed two elements.
1572 // if X and ~X we append -1 to the operand list.
1573 if (match(TheOp
, m_Not(m_Value()))) {
1574 Value
*V
= Constant::getAllOnesValue(X
->getType());
1575 Ops
.insert(Ops
.end(), ValueEntry(getRank(V
), V
));
1580 // Scan the operand list, checking to see if there are any common factors
1581 // between operands. Consider something like A*A+A*B*C+D. We would like to
1582 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
1583 // To efficiently find this, we count the number of times a factor occurs
1584 // for any ADD operands that are MULs.
1585 DenseMap
<Value
*, unsigned> FactorOccurrences
;
1587 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1588 // where they are actually the same multiply.
1589 unsigned MaxOcc
= 0;
1590 Value
*MaxOccVal
= nullptr;
1591 for (unsigned i
= 0, e
= Ops
.size(); i
!= e
; ++i
) {
1592 BinaryOperator
*BOp
=
1593 isReassociableOp(Ops
[i
].Op
, Instruction::Mul
, Instruction::FMul
);
1597 // Compute all of the factors of this added value.
1598 SmallVector
<Value
*, 8> Factors
;
1599 FindSingleUseMultiplyFactors(BOp
, Factors
);
1600 assert(Factors
.size() > 1 && "Bad linearize!");
1602 // Add one to FactorOccurrences for each unique factor in this op.
1603 SmallPtrSet
<Value
*, 8> Duplicates
;
1604 for (Value
*Factor
: Factors
) {
1605 if (!Duplicates
.insert(Factor
).second
)
1608 unsigned Occ
= ++FactorOccurrences
[Factor
];
1614 // If Factor is a negative constant, add the negated value as a factor
1615 // because we can percolate the negate out. Watch for minint, which
1616 // cannot be positivified.
1617 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Factor
)) {
1618 if (CI
->isNegative() && !CI
->isMinValue(true)) {
1619 Factor
= ConstantInt::get(CI
->getContext(), -CI
->getValue());
1620 if (!Duplicates
.insert(Factor
).second
)
1622 unsigned Occ
= ++FactorOccurrences
[Factor
];
1628 } else if (ConstantFP
*CF
= dyn_cast
<ConstantFP
>(Factor
)) {
1629 if (CF
->isNegative()) {
1630 APFloat
F(CF
->getValueAPF());
1632 Factor
= ConstantFP::get(CF
->getContext(), F
);
1633 if (!Duplicates
.insert(Factor
).second
)
1635 unsigned Occ
= ++FactorOccurrences
[Factor
];
1645 // If any factor occurred more than one time, we can pull it out.
1647 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc
<< "]: " << *MaxOccVal
1651 // Create a new instruction that uses the MaxOccVal twice. If we don't do
1652 // this, we could otherwise run into situations where removing a factor
1653 // from an expression will drop a use of maxocc, and this can cause
1654 // RemoveFactorFromExpression on successive values to behave differently.
1655 Instruction
*DummyInst
=
1656 I
->getType()->isIntOrIntVectorTy()
1657 ? BinaryOperator::CreateAdd(MaxOccVal
, MaxOccVal
)
1658 : BinaryOperator::CreateFAdd(MaxOccVal
, MaxOccVal
);
1660 SmallVector
<WeakTrackingVH
, 4> NewMulOps
;
1661 for (unsigned i
= 0; i
!= Ops
.size(); ++i
) {
1662 // Only try to remove factors from expressions we're allowed to.
1663 BinaryOperator
*BOp
=
1664 isReassociableOp(Ops
[i
].Op
, Instruction::Mul
, Instruction::FMul
);
1668 if (Value
*V
= RemoveFactorFromExpression(Ops
[i
].Op
, MaxOccVal
)) {
1669 // The factorized operand may occur several times. Convert them all in
1671 for (unsigned j
= Ops
.size(); j
!= i
;) {
1673 if (Ops
[j
].Op
== Ops
[i
].Op
) {
1674 NewMulOps
.push_back(V
);
1675 Ops
.erase(Ops
.begin()+j
);
1682 // No need for extra uses anymore.
1683 DummyInst
->deleteValue();
1685 unsigned NumAddedValues
= NewMulOps
.size();
1686 Value
*V
= EmitAddTreeOfValues(I
->getIterator(), NewMulOps
);
1688 // Now that we have inserted the add tree, optimize it. This allows us to
1689 // handle cases that require multiple factoring steps, such as this:
1690 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C))
1691 assert(NumAddedValues
> 1 && "Each occurrence should contribute a value");
1692 (void)NumAddedValues
;
1693 if (Instruction
*VI
= dyn_cast
<Instruction
>(V
))
1694 RedoInsts
.insert(VI
);
1696 // Create the multiply.
1697 Instruction
*V2
= CreateMul(V
, MaxOccVal
, "reass.mul", I
->getIterator(), I
);
1699 // Rerun associate on the multiply in case the inner expression turned into
1700 // a multiply. We want to make sure that we keep things in canonical form.
1701 RedoInsts
.insert(V2
);
1703 // If every add operand included the factor (e.g. "A*B + A*C"), then the
1704 // entire result expression is just the multiply "A*(B+C)".
1708 // Otherwise, we had some input that didn't have the factor, such as
1709 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of
1710 // things being added by this operation.
1711 Ops
.insert(Ops
.begin(), ValueEntry(getRank(V2
), V2
));
1717 /// Build up a vector of value/power pairs factoring a product.
1719 /// Given a series of multiplication operands, build a vector of factors and
1720 /// the powers each is raised to when forming the final product. Sort them in
1721 /// the order of descending power.
1723 /// (x*x) -> [(x, 2)]
1724 /// ((x*x)*x) -> [(x, 3)]
1725 /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1727 /// \returns Whether any factors have a power greater than one.
1728 static bool collectMultiplyFactors(SmallVectorImpl
<ValueEntry
> &Ops
,
1729 SmallVectorImpl
<Factor
> &Factors
) {
1730 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1731 // Compute the sum of powers of simplifiable factors.
1732 unsigned FactorPowerSum
= 0;
1733 for (unsigned Idx
= 1, Size
= Ops
.size(); Idx
< Size
; ++Idx
) {
1734 Value
*Op
= Ops
[Idx
-1].Op
;
1736 // Count the number of occurrences of this value.
1738 for (; Idx
< Size
&& Ops
[Idx
].Op
== Op
; ++Idx
)
1740 // Track for simplification all factors which occur 2 or more times.
1742 FactorPowerSum
+= Count
;
1745 // We can only simplify factors if the sum of the powers of our simplifiable
1746 // factors is 4 or higher. When that is the case, we will *always* have
1747 // a simplification. This is an important invariant to prevent cyclicly
1748 // trying to simplify already minimal formations.
1749 if (FactorPowerSum
< 4)
1752 // Now gather the simplifiable factors, removing them from Ops.
1754 for (unsigned Idx
= 1; Idx
< Ops
.size(); ++Idx
) {
1755 Value
*Op
= Ops
[Idx
-1].Op
;
1757 // Count the number of occurrences of this value.
1759 for (; Idx
< Ops
.size() && Ops
[Idx
].Op
== Op
; ++Idx
)
1763 // Move an even number of occurrences to Factors.
1766 FactorPowerSum
+= Count
;
1767 Factors
.push_back(Factor(Op
, Count
));
1768 Ops
.erase(Ops
.begin()+Idx
, Ops
.begin()+Idx
+Count
);
1771 // None of the adjustments above should have reduced the sum of factor powers
1772 // below our mininum of '4'.
1773 assert(FactorPowerSum
>= 4);
1775 llvm::stable_sort(Factors
, [](const Factor
&LHS
, const Factor
&RHS
) {
1776 return LHS
.Power
> RHS
.Power
;
1781 /// Build a tree of multiplies, computing the product of Ops.
1782 static Value
*buildMultiplyTree(IRBuilderBase
&Builder
,
1783 SmallVectorImpl
<Value
*> &Ops
) {
1784 if (Ops
.size() == 1)
1787 Value
*LHS
= Ops
.pop_back_val();
1789 if (LHS
->getType()->isIntOrIntVectorTy())
1790 LHS
= Builder
.CreateMul(LHS
, Ops
.pop_back_val());
1792 LHS
= Builder
.CreateFMul(LHS
, Ops
.pop_back_val());
1793 } while (!Ops
.empty());
1798 /// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1800 /// Given a vector of values raised to various powers, where no two values are
1801 /// equal and the powers are sorted in decreasing order, compute the minimal
1802 /// DAG of multiplies to compute the final product, and return that product
1805 ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase
&Builder
,
1806 SmallVectorImpl
<Factor
> &Factors
) {
1807 assert(Factors
[0].Power
);
1808 SmallVector
<Value
*, 4> OuterProduct
;
1809 for (unsigned LastIdx
= 0, Idx
= 1, Size
= Factors
.size();
1810 Idx
< Size
&& Factors
[Idx
].Power
> 0; ++Idx
) {
1811 if (Factors
[Idx
].Power
!= Factors
[LastIdx
].Power
) {
1816 // We want to multiply across all the factors with the same power so that
1817 // we can raise them to that power as a single entity. Build a mini tree
1819 SmallVector
<Value
*, 4> InnerProduct
;
1820 InnerProduct
.push_back(Factors
[LastIdx
].Base
);
1822 InnerProduct
.push_back(Factors
[Idx
].Base
);
1824 } while (Idx
< Size
&& Factors
[Idx
].Power
== Factors
[LastIdx
].Power
);
1826 // Reset the base value of the first factor to the new expression tree.
1827 // We'll remove all the factors with the same power in a second pass.
1828 Value
*M
= Factors
[LastIdx
].Base
= buildMultiplyTree(Builder
, InnerProduct
);
1829 if (Instruction
*MI
= dyn_cast
<Instruction
>(M
))
1830 RedoInsts
.insert(MI
);
1834 // Unique factors with equal powers -- we've folded them into the first one's
1836 Factors
.erase(llvm::unique(Factors
,
1837 [](const Factor
&LHS
, const Factor
&RHS
) {
1838 return LHS
.Power
== RHS
.Power
;
1842 // Iteratively collect the base of each factor with an add power into the
1843 // outer product, and halve each power in preparation for squaring the
1845 for (Factor
&F
: Factors
) {
1847 OuterProduct
.push_back(F
.Base
);
1850 if (Factors
[0].Power
) {
1851 Value
*SquareRoot
= buildMinimalMultiplyDAG(Builder
, Factors
);
1852 OuterProduct
.push_back(SquareRoot
);
1853 OuterProduct
.push_back(SquareRoot
);
1855 if (OuterProduct
.size() == 1)
1856 return OuterProduct
.front();
1858 Value
*V
= buildMultiplyTree(Builder
, OuterProduct
);
1862 Value
*ReassociatePass::OptimizeMul(BinaryOperator
*I
,
1863 SmallVectorImpl
<ValueEntry
> &Ops
) {
1864 // We can only optimize the multiplies when there is a chain of more than
1865 // three, such that a balanced tree might require fewer total multiplies.
1869 // Try to turn linear trees of multiplies without other uses of the
1870 // intermediate stages into minimal multiply DAGs with perfect sub-expression
1872 SmallVector
<Factor
, 4> Factors
;
1873 if (!collectMultiplyFactors(Ops
, Factors
))
1874 return nullptr; // All distinct factors, so nothing left for us to do.
1876 IRBuilder
<> Builder(I
);
1877 // The reassociate transformation for FP operations is performed only
1878 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1879 // to the newly generated operations.
1880 if (auto FPI
= dyn_cast
<FPMathOperator
>(I
))
1881 Builder
.setFastMathFlags(FPI
->getFastMathFlags());
1883 Value
*V
= buildMinimalMultiplyDAG(Builder
, Factors
);
1887 ValueEntry NewEntry
= ValueEntry(getRank(V
), V
);
1888 Ops
.insert(llvm::lower_bound(Ops
, NewEntry
), NewEntry
);
1892 Value
*ReassociatePass::OptimizeExpression(BinaryOperator
*I
,
1893 SmallVectorImpl
<ValueEntry
> &Ops
) {
1894 // Now that we have the linearized expression tree, try to optimize it.
1895 // Start by folding any constants that we found.
1896 const DataLayout
&DL
= I
->getDataLayout();
1897 Constant
*Cst
= nullptr;
1898 unsigned Opcode
= I
->getOpcode();
1899 while (!Ops
.empty()) {
1900 if (auto *C
= dyn_cast
<Constant
>(Ops
.back().Op
)) {
1906 if (Constant
*Res
= ConstantFoldBinaryOpOperands(Opcode
, C
, Cst
, DL
)) {
1914 // If there was nothing but constants then we are done.
1918 // Put the combined constant back at the end of the operand list, except if
1919 // there is no point. For example, an add of 0 gets dropped here, while a
1920 // multiplication by zero turns the whole expression into zero.
1921 if (Cst
&& Cst
!= ConstantExpr::getBinOpIdentity(Opcode
, I
->getType())) {
1922 if (Cst
== ConstantExpr::getBinOpAbsorber(Opcode
, I
->getType()))
1924 Ops
.push_back(ValueEntry(0, Cst
));
1927 if (Ops
.size() == 1) return Ops
[0].Op
;
1929 // Handle destructive annihilation due to identities between elements in the
1930 // argument list here.
1931 unsigned NumOps
= Ops
.size();
1934 case Instruction::And
:
1935 case Instruction::Or
:
1936 if (Value
*Result
= OptimizeAndOrXor(Opcode
, Ops
))
1940 case Instruction::Xor
:
1941 if (Value
*Result
= OptimizeXor(I
, Ops
))
1945 case Instruction::Add
:
1946 case Instruction::FAdd
:
1947 if (Value
*Result
= OptimizeAdd(I
, Ops
))
1951 case Instruction::Mul
:
1952 case Instruction::FMul
:
1953 if (Value
*Result
= OptimizeMul(I
, Ops
))
1958 if (Ops
.size() != NumOps
)
1959 return OptimizeExpression(I
, Ops
);
1963 // Remove dead instructions and if any operands are trivially dead add them to
1964 // Insts so they will be removed as well.
1965 void ReassociatePass::RecursivelyEraseDeadInsts(Instruction
*I
,
1966 OrderedSet
&Insts
) {
1967 assert(isInstructionTriviallyDead(I
) && "Trivially dead instructions only!");
1968 SmallVector
<Value
*, 4> Ops(I
->operands());
1969 ValueRankMap
.erase(I
);
1971 RedoInsts
.remove(I
);
1972 llvm::salvageDebugInfo(*I
);
1973 I
->eraseFromParent();
1974 for (auto *Op
: Ops
)
1975 if (Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
))
1976 if (OpInst
->use_empty())
1977 Insts
.insert(OpInst
);
1980 /// Zap the given instruction, adding interesting operands to the work list.
1981 void ReassociatePass::EraseInst(Instruction
*I
) {
1982 assert(isInstructionTriviallyDead(I
) && "Trivially dead instructions only!");
1983 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I
->dump());
1985 SmallVector
<Value
*, 8> Ops(I
->operands());
1986 // Erase the dead instruction.
1987 ValueRankMap
.erase(I
);
1988 RedoInsts
.remove(I
);
1989 llvm::salvageDebugInfo(*I
);
1990 I
->eraseFromParent();
1991 // Optimize its operands.
1992 SmallPtrSet
<Instruction
*, 8> Visited
; // Detect self-referential nodes.
1993 for (Value
*V
: Ops
)
1994 if (Instruction
*Op
= dyn_cast
<Instruction
>(V
)) {
1995 // If this is a node in an expression tree, climb to the expression root
1996 // and add that since that's where optimization actually happens.
1997 unsigned Opcode
= Op
->getOpcode();
1998 while (Op
->hasOneUse() && Op
->user_back()->getOpcode() == Opcode
&&
1999 Visited
.insert(Op
).second
)
2000 Op
= Op
->user_back();
2002 // The instruction we're going to push may be coming from a
2003 // dead block, and Reassociate skips the processing of unreachable
2004 // blocks because it's a waste of time and also because it can
2005 // lead to infinite loop due to LLVM's non-standard definition
2007 if (ValueRankMap
.contains(Op
))
2008 RedoInsts
.insert(Op
);
2014 /// Recursively analyze an expression to build a list of instructions that have
2015 /// negative floating-point constant operands. The caller can then transform
2016 /// the list to create positive constants for better reassociation and CSE.
2017 static void getNegatibleInsts(Value
*V
,
2018 SmallVectorImpl
<Instruction
*> &Candidates
) {
2019 // Handle only one-use instructions. Combining negations does not justify
2020 // replicating instructions.
2022 if (!match(V
, m_OneUse(m_Instruction(I
))))
2025 // Handle expressions of multiplications and divisions.
2026 // TODO: This could look through floating-point casts.
2028 switch (I
->getOpcode()) {
2029 case Instruction::FMul
:
2030 // Not expecting non-canonical code here. Bail out and wait.
2031 if (match(I
->getOperand(0), m_Constant()))
2034 if (match(I
->getOperand(1), m_APFloat(C
)) && C
->isNegative()) {
2035 Candidates
.push_back(I
);
2036 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I
<< '\n');
2038 getNegatibleInsts(I
->getOperand(0), Candidates
);
2039 getNegatibleInsts(I
->getOperand(1), Candidates
);
2041 case Instruction::FDiv
:
2042 // Not expecting non-canonical code here. Bail out and wait.
2043 if (match(I
->getOperand(0), m_Constant()) &&
2044 match(I
->getOperand(1), m_Constant()))
2047 if ((match(I
->getOperand(0), m_APFloat(C
)) && C
->isNegative()) ||
2048 (match(I
->getOperand(1), m_APFloat(C
)) && C
->isNegative())) {
2049 Candidates
.push_back(I
);
2050 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I
<< '\n');
2052 getNegatibleInsts(I
->getOperand(0), Candidates
);
2053 getNegatibleInsts(I
->getOperand(1), Candidates
);
2060 /// Given an fadd/fsub with an operand that is a one-use instruction
2061 /// (the fadd/fsub), try to change negative floating-point constants into
2062 /// positive constants to increase potential for reassociation and CSE.
2063 Instruction
*ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction
*I
,
2066 assert((I
->getOpcode() == Instruction::FAdd
||
2067 I
->getOpcode() == Instruction::FSub
) && "Expected fadd/fsub");
2069 // Collect instructions with negative FP constants from the subtree that ends
2071 SmallVector
<Instruction
*, 4> Candidates
;
2072 getNegatibleInsts(Op
, Candidates
);
2073 if (Candidates
.empty())
2076 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
2077 // resulting subtract will be broken up later. This can get us into an
2078 // infinite loop during reassociation.
2079 bool IsFSub
= I
->getOpcode() == Instruction::FSub
;
2080 bool NeedsSubtract
= !IsFSub
&& Candidates
.size() % 2 == 1;
2081 if (NeedsSubtract
&& ShouldBreakUpSubtract(I
))
2084 for (Instruction
*Negatible
: Candidates
) {
2086 if (match(Negatible
->getOperand(0), m_APFloat(C
))) {
2087 assert(!match(Negatible
->getOperand(1), m_Constant()) &&
2088 "Expecting only 1 constant operand");
2089 assert(C
->isNegative() && "Expected negative FP constant");
2090 Negatible
->setOperand(0, ConstantFP::get(Negatible
->getType(), abs(*C
)));
2093 if (match(Negatible
->getOperand(1), m_APFloat(C
))) {
2094 assert(!match(Negatible
->getOperand(0), m_Constant()) &&
2095 "Expecting only 1 constant operand");
2096 assert(C
->isNegative() && "Expected negative FP constant");
2097 Negatible
->setOperand(1, ConstantFP::get(Negatible
->getType(), abs(*C
)));
2101 assert(MadeChange
== true && "Negative constant candidate was not changed");
2103 // Negations cancelled out.
2104 if (Candidates
.size() % 2 == 0)
2107 // Negate the final operand in the expression by flipping the opcode of this
2109 assert(Candidates
.size() % 2 == 1 && "Expected odd number");
2110 IRBuilder
<> Builder(I
);
2111 Value
*NewInst
= IsFSub
? Builder
.CreateFAddFMF(OtherOp
, Op
, I
)
2112 : Builder
.CreateFSubFMF(OtherOp
, Op
, I
);
2113 I
->replaceAllUsesWith(NewInst
);
2114 RedoInsts
.insert(I
);
2115 return dyn_cast
<Instruction
>(NewInst
);
2118 /// Canonicalize expressions that contain a negative floating-point constant
2119 /// of the following form:
2120 /// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2121 /// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2122 /// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2124 /// The fadd/fsub opcode may be switched to allow folding a negation into the
2125 /// input instruction.
2126 Instruction
*ReassociatePass::canonicalizeNegFPConstants(Instruction
*I
) {
2127 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I
<< '\n');
2130 if (match(I
, m_FAdd(m_Value(X
), m_OneUse(m_Instruction(Op
)))))
2131 if (Instruction
*R
= canonicalizeNegFPConstantsForOp(I
, Op
, X
))
2133 if (match(I
, m_FAdd(m_OneUse(m_Instruction(Op
)), m_Value(X
))))
2134 if (Instruction
*R
= canonicalizeNegFPConstantsForOp(I
, Op
, X
))
2136 if (match(I
, m_FSub(m_Value(X
), m_OneUse(m_Instruction(Op
)))))
2137 if (Instruction
*R
= canonicalizeNegFPConstantsForOp(I
, Op
, X
))
2142 /// Inspect and optimize the given instruction. Note that erasing
2143 /// instructions is not allowed.
2144 void ReassociatePass::OptimizeInst(Instruction
*I
) {
2145 // Only consider operations that we understand.
2146 if (!isa
<UnaryOperator
>(I
) && !isa
<BinaryOperator
>(I
))
2149 if (I
->getOpcode() == Instruction::Shl
&& isa
<ConstantInt
>(I
->getOperand(1)))
2150 // If an operand of this shift is a reassociable multiply, or if the shift
2151 // is used by a reassociable multiply or add, turn into a multiply.
2152 if (isReassociableOp(I
->getOperand(0), Instruction::Mul
) ||
2154 (isReassociableOp(I
->user_back(), Instruction::Mul
) ||
2155 isReassociableOp(I
->user_back(), Instruction::Add
)))) {
2156 Instruction
*NI
= ConvertShiftToMul(I
);
2157 RedoInsts
.insert(I
);
2162 // Commute binary operators, to canonicalize the order of their operands.
2163 // This can potentially expose more CSE opportunities, and makes writing other
2164 // transformations simpler.
2165 if (I
->isCommutative())
2166 canonicalizeOperands(I
);
2168 // Canonicalize negative constants out of expressions.
2169 if (Instruction
*Res
= canonicalizeNegFPConstants(I
))
2172 // Don't optimize floating-point instructions unless they have the
2173 // appropriate FastMathFlags for reassociation enabled.
2174 if (isa
<FPMathOperator
>(I
) && !hasFPAssociativeFlags(I
))
2177 // Do not reassociate boolean (i1) expressions. We want to preserve the
2178 // original order of evaluation for short-circuited comparisons that
2179 // SimplifyCFG has folded to AND/OR expressions. If the expression
2180 // is not further optimized, it is likely to be transformed back to a
2181 // short-circuited form for code gen, and the source order may have been
2182 // optimized for the most likely conditions.
2183 if (I
->getType()->isIntegerTy(1))
2186 // If this is a bitwise or instruction of operands
2187 // with no common bits set, convert it to X+Y.
2188 if (I
->getOpcode() == Instruction::Or
&&
2189 shouldConvertOrWithNoCommonBitsToAdd(I
) && !isLoadCombineCandidate(I
) &&
2190 (cast
<PossiblyDisjointInst
>(I
)->isDisjoint() ||
2191 haveNoCommonBitsSet(I
->getOperand(0), I
->getOperand(1),
2192 SimplifyQuery(I
->getDataLayout(),
2193 /*DT=*/nullptr, /*AC=*/nullptr, I
)))) {
2194 Instruction
*NI
= convertOrWithNoCommonBitsToAdd(I
);
2195 RedoInsts
.insert(I
);
2200 // If this is a subtract instruction which is not already in negate form,
2201 // see if we can convert it to X+-Y.
2202 if (I
->getOpcode() == Instruction::Sub
) {
2203 if (ShouldBreakUpSubtract(I
)) {
2204 Instruction
*NI
= BreakUpSubtract(I
, RedoInsts
);
2205 RedoInsts
.insert(I
);
2208 } else if (match(I
, m_Neg(m_Value()))) {
2209 // Otherwise, this is a negation. See if the operand is a multiply tree
2210 // and if this is not an inner node of a multiply tree.
2211 if (isReassociableOp(I
->getOperand(1), Instruction::Mul
) &&
2213 !isReassociableOp(I
->user_back(), Instruction::Mul
))) {
2214 Instruction
*NI
= LowerNegateToMultiply(I
);
2215 // If the negate was simplified, revisit the users to see if we can
2216 // reassociate further.
2217 for (User
*U
: NI
->users()) {
2218 if (BinaryOperator
*Tmp
= dyn_cast
<BinaryOperator
>(U
))
2219 RedoInsts
.insert(Tmp
);
2221 RedoInsts
.insert(I
);
2226 } else if (I
->getOpcode() == Instruction::FNeg
||
2227 I
->getOpcode() == Instruction::FSub
) {
2228 if (ShouldBreakUpSubtract(I
)) {
2229 Instruction
*NI
= BreakUpSubtract(I
, RedoInsts
);
2230 RedoInsts
.insert(I
);
2233 } else if (match(I
, m_FNeg(m_Value()))) {
2234 // Otherwise, this is a negation. See if the operand is a multiply tree
2235 // and if this is not an inner node of a multiply tree.
2236 Value
*Op
= isa
<BinaryOperator
>(I
) ? I
->getOperand(1) :
2238 if (isReassociableOp(Op
, Instruction::FMul
) &&
2240 !isReassociableOp(I
->user_back(), Instruction::FMul
))) {
2241 // If the negate was simplified, revisit the users to see if we can
2242 // reassociate further.
2243 Instruction
*NI
= LowerNegateToMultiply(I
);
2244 for (User
*U
: NI
->users()) {
2245 if (BinaryOperator
*Tmp
= dyn_cast
<BinaryOperator
>(U
))
2246 RedoInsts
.insert(Tmp
);
2248 RedoInsts
.insert(I
);
2255 // If this instruction is an associative binary operator, process it.
2256 if (!I
->isAssociative()) return;
2257 BinaryOperator
*BO
= cast
<BinaryOperator
>(I
);
2259 // If this is an interior node of a reassociable tree, ignore it until we
2260 // get to the root of the tree, to avoid N^2 analysis.
2261 unsigned Opcode
= BO
->getOpcode();
2262 if (BO
->hasOneUse() && BO
->user_back()->getOpcode() == Opcode
) {
2263 // During the initial run we will get to the root of the tree.
2264 // But if we get here while we are redoing instructions, there is no
2265 // guarantee that the root will be visited. So Redo later
2266 if (BO
->user_back() != BO
&&
2267 BO
->getParent() == BO
->user_back()->getParent())
2268 RedoInsts
.insert(BO
->user_back());
2272 // If this is an add tree that is used by a sub instruction, ignore it
2273 // until we process the subtract.
2274 if (BO
->hasOneUse() && BO
->getOpcode() == Instruction::Add
&&
2275 cast
<Instruction
>(BO
->user_back())->getOpcode() == Instruction::Sub
)
2277 if (BO
->hasOneUse() && BO
->getOpcode() == Instruction::FAdd
&&
2278 cast
<Instruction
>(BO
->user_back())->getOpcode() == Instruction::FSub
)
2281 ReassociateExpression(BO
);
2284 void ReassociatePass::ReassociateExpression(BinaryOperator
*I
) {
2285 // First, walk the expression tree, linearizing the tree, collecting the
2286 // operand information.
2287 SmallVector
<RepeatedValue
, 8> Tree
;
2288 OverflowTracking Flags
;
2289 MadeChange
|= LinearizeExprTree(I
, Tree
, RedoInsts
, Flags
);
2290 SmallVector
<ValueEntry
, 8> Ops
;
2291 Ops
.reserve(Tree
.size());
2292 for (const RepeatedValue
&E
: Tree
)
2293 Ops
.append(E
.second
, ValueEntry(getRank(E
.first
), E
.first
));
2295 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I
, Ops
); dbgs() << '\n');
2297 // Now that we have linearized the tree to a list and have gathered all of
2298 // the operands and their ranks, sort the operands by their rank. Use a
2299 // stable_sort so that values with equal ranks will have their relative
2300 // positions maintained (and so the compiler is deterministic). Note that
2301 // this sorts so that the highest ranking values end up at the beginning of
2303 llvm::stable_sort(Ops
);
2305 // Now that we have the expression tree in a convenient
2306 // sorted form, optimize it globally if possible.
2307 if (Value
*V
= OptimizeExpression(I
, Ops
)) {
2309 // Self-referential expression in unreachable code.
2311 // This expression tree simplified to something that isn't a tree,
2313 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V
<< '\n');
2314 I
->replaceAllUsesWith(V
);
2315 if (Instruction
*VI
= dyn_cast
<Instruction
>(V
))
2316 if (I
->getDebugLoc())
2317 VI
->setDebugLoc(I
->getDebugLoc());
2318 RedoInsts
.insert(I
);
2323 // We want to sink immediates as deeply as possible except in the case where
2324 // this is a multiply tree used only by an add, and the immediate is a -1.
2325 // In this case we reassociate to put the negation on the outside so that we
2326 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
2327 if (I
->hasOneUse()) {
2328 if (I
->getOpcode() == Instruction::Mul
&&
2329 cast
<Instruction
>(I
->user_back())->getOpcode() == Instruction::Add
&&
2330 isa
<ConstantInt
>(Ops
.back().Op
) &&
2331 cast
<ConstantInt
>(Ops
.back().Op
)->isMinusOne()) {
2332 ValueEntry Tmp
= Ops
.pop_back_val();
2333 Ops
.insert(Ops
.begin(), Tmp
);
2334 } else if (I
->getOpcode() == Instruction::FMul
&&
2335 cast
<Instruction
>(I
->user_back())->getOpcode() ==
2336 Instruction::FAdd
&&
2337 isa
<ConstantFP
>(Ops
.back().Op
) &&
2338 cast
<ConstantFP
>(Ops
.back().Op
)->isExactlyValue(-1.0)) {
2339 ValueEntry Tmp
= Ops
.pop_back_val();
2340 Ops
.insert(Ops
.begin(), Tmp
);
2344 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I
, Ops
); dbgs() << '\n');
2346 if (Ops
.size() == 1) {
2348 // Self-referential expression in unreachable code.
2351 // This expression tree simplified to something that isn't a tree,
2353 I
->replaceAllUsesWith(Ops
[0].Op
);
2354 if (Instruction
*OI
= dyn_cast
<Instruction
>(Ops
[0].Op
))
2355 OI
->setDebugLoc(I
->getDebugLoc());
2356 RedoInsts
.insert(I
);
2360 if (Ops
.size() > 2 && Ops
.size() <= GlobalReassociateLimit
) {
2361 // Find the pair with the highest count in the pairmap and move it to the
2362 // back of the list so that it can later be CSE'd.
2365 // if c*e is the most "popular" pair, we can express this as
2368 unsigned BestRank
= 0;
2369 std::pair
<unsigned, unsigned> BestPair
;
2370 unsigned Idx
= I
->getOpcode() - Instruction::BinaryOpsBegin
;
2371 unsigned LimitIdx
= 0;
2372 // With the CSE-driven heuristic, we are about to slap two values at the
2373 // beginning of the expression whereas they could live very late in the CFG.
2374 // When using the CSE-local heuristic we avoid creating dependences from
2375 // completely unrelated part of the CFG by limiting the expression
2376 // reordering on the values that live in the first seen basic block.
2377 // The main idea is that we want to avoid forming expressions that would
2378 // become loop dependent.
2379 if (UseCSELocalOpt
) {
2380 const BasicBlock
*FirstSeenBB
= nullptr;
2381 int StartIdx
= Ops
.size() - 1;
2382 // Skip the first value of the expression since we need at least two
2383 // values to materialize an expression. I.e., even if this value is
2384 // anchored in a different basic block, the actual first sub expression
2385 // will be anchored on the second value.
2386 for (int i
= StartIdx
- 1; i
!= -1; --i
) {
2387 const Value
*Val
= Ops
[i
].Op
;
2388 const auto *CurrLeafInstr
= dyn_cast
<Instruction
>(Val
);
2389 const BasicBlock
*SeenBB
= nullptr;
2390 if (!CurrLeafInstr
) {
2391 // The value is free of any CFG dependencies.
2392 // Do as if it lives in the entry block.
2394 // We do this to make sure all the values falling on this path are
2395 // seen through the same anchor point. The rationale is these values
2396 // can be combined together to from a sub expression free of any CFG
2397 // dependencies so we want them to stay together.
2398 // We could be cleverer and postpone the anchor down to the first
2399 // anchored value, but that's likely complicated to get right.
2400 // E.g., we wouldn't want to do that if that means being stuck in a
2403 // For instance, we wouldn't want to change:
2404 // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2406 // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2407 // Because all the sub expressions with arg2..N would be stuck between
2408 // two loop dependent values.
2409 SeenBB
= &I
->getParent()->getParent()->getEntryBlock();
2411 SeenBB
= CurrLeafInstr
->getParent();
2415 FirstSeenBB
= SeenBB
;
2418 if (FirstSeenBB
!= SeenBB
) {
2419 // ith value is in a different basic block.
2420 // Rewind the index once to point to the last value on the same basic
2423 LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2424 << LimitIdx
<< ", " << StartIdx
<< "]\n");
2429 for (unsigned i
= Ops
.size() - 1; i
> LimitIdx
; --i
) {
2430 // We must use int type to go below zero when LimitIdx is 0.
2431 for (int j
= i
- 1; j
>= (int)LimitIdx
; --j
) {
2433 Value
*Op0
= Ops
[i
].Op
;
2434 Value
*Op1
= Ops
[j
].Op
;
2435 if (std::less
<Value
*>()(Op1
, Op0
))
2436 std::swap(Op0
, Op1
);
2437 auto it
= PairMap
[Idx
].find({Op0
, Op1
});
2438 if (it
!= PairMap
[Idx
].end()) {
2439 // Functions like BreakUpSubtract() can erase the Values we're using
2440 // as keys and create new Values after we built the PairMap. There's a
2441 // small chance that the new nodes can have the same address as
2442 // something already in the table. We shouldn't accumulate the stored
2443 // score in that case as it refers to the wrong Value.
2444 if (it
->second
.isValid())
2445 Score
+= it
->second
.Score
;
2448 unsigned MaxRank
= std::max(Ops
[i
].Rank
, Ops
[j
].Rank
);
2450 // By construction, the operands are sorted in reverse order of their
2451 // topological order.
2452 // So we tend to form (sub) expressions with values that are close to
2455 // Now to expose more CSE opportunities we want to expose the pair of
2456 // operands that occur the most (as statically computed in
2457 // BuildPairMap.) as the first sub-expression.
2459 // If two pairs occur as many times, we pick the one with the
2460 // lowest rank, meaning the one with both operands appearing first in
2461 // the topological order.
2462 if (Score
> Max
|| (Score
== Max
&& MaxRank
< BestRank
)) {
2470 auto Op0
= Ops
[BestPair
.first
];
2471 auto Op1
= Ops
[BestPair
.second
];
2472 Ops
.erase(&Ops
[BestPair
.second
]);
2473 Ops
.erase(&Ops
[BestPair
.first
]);
2478 LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I
, Ops
);
2480 // Now that we ordered and optimized the expressions, splat them back into
2481 // the expression tree, removing any unneeded nodes.
2482 RewriteExprTree(I
, Ops
, Flags
);
2486 ReassociatePass::BuildPairMap(ReversePostOrderTraversal
<Function
*> &RPOT
) {
2487 // Make a "pairmap" of how often each operand pair occurs.
2488 for (BasicBlock
*BI
: RPOT
) {
2489 for (Instruction
&I
: *BI
) {
2490 if (!I
.isAssociative() || !I
.isBinaryOp())
2493 // Ignore nodes that aren't at the root of trees.
2494 if (I
.hasOneUse() && I
.user_back()->getOpcode() == I
.getOpcode())
2497 // Collect all operands in a single reassociable expression.
2498 // Since Reassociate has already been run once, we can assume things
2499 // are already canonical according to Reassociation's regime.
2500 SmallVector
<Value
*, 8> Worklist
= { I
.getOperand(0), I
.getOperand(1) };
2501 SmallVector
<Value
*, 8> Ops
;
2502 while (!Worklist
.empty() && Ops
.size() <= GlobalReassociateLimit
) {
2503 Value
*Op
= Worklist
.pop_back_val();
2504 Instruction
*OpI
= dyn_cast
<Instruction
>(Op
);
2505 if (!OpI
|| OpI
->getOpcode() != I
.getOpcode() || !OpI
->hasOneUse()) {
2509 // Be paranoid about self-referencing expressions in unreachable code.
2510 if (OpI
->getOperand(0) != OpI
)
2511 Worklist
.push_back(OpI
->getOperand(0));
2512 if (OpI
->getOperand(1) != OpI
)
2513 Worklist
.push_back(OpI
->getOperand(1));
2515 // Skip extremely long expressions.
2516 if (Ops
.size() > GlobalReassociateLimit
)
2519 // Add all pairwise combinations of operands to the pair map.
2520 unsigned BinaryIdx
= I
.getOpcode() - Instruction::BinaryOpsBegin
;
2521 SmallSet
<std::pair
<Value
*, Value
*>, 32> Visited
;
2522 for (unsigned i
= 0; i
< Ops
.size() - 1; ++i
) {
2523 for (unsigned j
= i
+ 1; j
< Ops
.size(); ++j
) {
2524 // Canonicalize operand orderings.
2525 Value
*Op0
= Ops
[i
];
2526 Value
*Op1
= Ops
[j
];
2527 if (std::less
<Value
*>()(Op1
, Op0
))
2528 std::swap(Op0
, Op1
);
2529 if (!Visited
.insert({Op0
, Op1
}).second
)
2531 auto res
= PairMap
[BinaryIdx
].insert({{Op0
, Op1
}, {Op0
, Op1
, 1}});
2533 // If either key value has been erased then we've got the same
2534 // address by coincidence. That can't happen here because nothing is
2535 // erasing values but it can happen by the time we're querying the
2537 assert(res
.first
->second
.isValid() && "WeakVH invalidated");
2538 ++res
.first
->second
.Score
;
2546 PreservedAnalyses
ReassociatePass::run(Function
&F
, FunctionAnalysisManager
&) {
2547 // Get the functions basic blocks in Reverse Post Order. This order is used by
2548 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2549 // blocks (it has been seen that the analysis in this pass could hang when
2550 // analysing dead basic blocks).
2551 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
2553 // Calculate the rank map for F.
2554 BuildRankMap(F
, RPOT
);
2556 // Build the pair map before running reassociate.
2557 // Technically this would be more accurate if we did it after one round
2558 // of reassociation, but in practice it doesn't seem to help much on
2559 // real-world code, so don't waste the compile time running reassociate
2561 // If a user wants, they could expicitly run reassociate twice in their
2562 // pass pipeline for further potential gains.
2563 // It might also be possible to update the pair map during runtime, but the
2564 // overhead of that may be large if there's many reassociable chains.
2569 // Traverse the same blocks that were analysed by BuildRankMap.
2570 for (BasicBlock
*BI
: RPOT
) {
2571 assert(RankMap
.count(&*BI
) && "BB should be ranked.");
2572 // Optimize every instruction in the basic block.
2573 for (BasicBlock::iterator II
= BI
->begin(), IE
= BI
->end(); II
!= IE
;)
2574 if (isInstructionTriviallyDead(&*II
)) {
2578 assert(II
->getParent() == &*BI
&& "Moved to a different block!");
2582 // Make a copy of all the instructions to be redone so we can remove dead
2584 OrderedSet
ToRedo(RedoInsts
);
2585 // Iterate over all instructions to be reevaluated and remove trivially dead
2586 // instructions. If any operand of the trivially dead instruction becomes
2587 // dead mark it for deletion as well. Continue this process until all
2588 // trivially dead instructions have been removed.
2589 while (!ToRedo
.empty()) {
2590 Instruction
*I
= ToRedo
.pop_back_val();
2591 if (isInstructionTriviallyDead(I
)) {
2592 RecursivelyEraseDeadInsts(I
, ToRedo
);
2597 // Now that we have removed dead instructions, we can reoptimize the
2598 // remaining instructions.
2599 while (!RedoInsts
.empty()) {
2600 Instruction
*I
= RedoInsts
.front();
2601 RedoInsts
.erase(RedoInsts
.begin());
2602 if (isInstructionTriviallyDead(I
))
2609 // We are done with the rank map and pair map.
2611 ValueRankMap
.clear();
2612 for (auto &Entry
: PairMap
)
2616 PreservedAnalyses PA
;
2617 PA
.preserveSet
<CFGAnalyses
>();
2621 return PreservedAnalyses::all();
2626 class ReassociateLegacyPass
: public FunctionPass
{
2627 ReassociatePass Impl
;
2630 static char ID
; // Pass identification, replacement for typeid
2632 ReassociateLegacyPass() : FunctionPass(ID
) {
2633 initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
2636 bool runOnFunction(Function
&F
) override
{
2637 if (skipFunction(F
))
2640 FunctionAnalysisManager DummyFAM
;
2641 auto PA
= Impl
.run(F
, DummyFAM
);
2642 return !PA
.areAllPreserved();
2645 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
2646 AU
.setPreservesCFG();
2647 AU
.addPreserved
<AAResultsWrapperPass
>();
2648 AU
.addPreserved
<BasicAAWrapperPass
>();
2649 AU
.addPreserved
<GlobalsAAWrapperPass
>();
2653 } // end anonymous namespace
2655 char ReassociateLegacyPass::ID
= 0;
2657 INITIALIZE_PASS(ReassociateLegacyPass
, "reassociate",
2658 "Reassociate expressions", false, false)
2660 // Public interface to the Reassociate pass
2661 FunctionPass
*llvm::createReassociatePass() {
2662 return new ReassociateLegacyPass();