1 //===- DemandedBits.cpp - Determine demanded bits -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements a demanded bits analysis. A demanded bit is one that
10 // contributes to a result; bits that are not demanded can be either zero or
11 // one without affecting control or data flow. For example in this sequence:
13 // %1 = add i32 %x, %y
14 // %2 = trunc i32 %1 to i16
16 // Only the lowest 16 bits of %1 are demanded; the rest are removed by the
19 //===----------------------------------------------------------------------===//
21 #include "llvm/Analysis/DemandedBits.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Analysis/AssumptionCache.h"
26 #include "llvm/Analysis/ValueTracking.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/InstIterator.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Intrinsics.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Use.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/KnownBits.h"
48 #include "llvm/Support/raw_ostream.h"
53 using namespace llvm::PatternMatch
;
55 #define DEBUG_TYPE "demanded-bits"
57 char DemandedBitsWrapperPass::ID
= 0;
59 INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass
, "demanded-bits",
60 "Demanded bits analysis", false, false)
61 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
62 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
63 INITIALIZE_PASS_END(DemandedBitsWrapperPass
, "demanded-bits",
64 "Demanded bits analysis", false, false)
66 DemandedBitsWrapperPass::DemandedBitsWrapperPass() : FunctionPass(ID
) {
67 initializeDemandedBitsWrapperPassPass(*PassRegistry::getPassRegistry());
70 void DemandedBitsWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
72 AU
.addRequired
<AssumptionCacheTracker
>();
73 AU
.addRequired
<DominatorTreeWrapperPass
>();
77 void DemandedBitsWrapperPass::print(raw_ostream
&OS
, const Module
*M
) const {
81 static bool isAlwaysLive(Instruction
*I
) {
82 return I
->isTerminator() || isa
<DbgInfoIntrinsic
>(I
) || I
->isEHPad() ||
83 I
->mayHaveSideEffects();
86 void DemandedBits::determineLiveOperandBits(
87 const Instruction
*UserI
, const Value
*Val
, unsigned OperandNo
,
88 const APInt
&AOut
, APInt
&AB
, KnownBits
&Known
, KnownBits
&Known2
,
89 bool &KnownBitsComputed
) {
90 unsigned BitWidth
= AB
.getBitWidth();
92 // We're called once per operand, but for some instructions, we need to
93 // compute known bits of both operands in order to determine the live bits of
94 // either (when both operands are instructions themselves). We don't,
95 // however, want to do this twice, so we cache the result in APInts that live
96 // in the caller. For the two-relevant-operands case, both operand values are
98 auto ComputeKnownBits
=
99 [&](unsigned BitWidth
, const Value
*V1
, const Value
*V2
) {
100 if (KnownBitsComputed
)
102 KnownBitsComputed
= true;
104 const DataLayout
&DL
= UserI
->getModule()->getDataLayout();
105 Known
= KnownBits(BitWidth
);
106 computeKnownBits(V1
, Known
, DL
, 0, &AC
, UserI
, &DT
);
109 Known2
= KnownBits(BitWidth
);
110 computeKnownBits(V2
, Known2
, DL
, 0, &AC
, UserI
, &DT
);
114 switch (UserI
->getOpcode()) {
116 case Instruction::Call
:
117 case Instruction::Invoke
:
118 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(UserI
)) {
119 switch (II
->getIntrinsicID()) {
121 case Intrinsic::bswap
:
122 // The alive bits of the input are the swapped alive bits of
124 AB
= AOut
.byteSwap();
126 case Intrinsic::bitreverse
:
127 // The alive bits of the input are the reversed alive bits of
129 AB
= AOut
.reverseBits();
131 case Intrinsic::ctlz
:
132 if (OperandNo
== 0) {
133 // We need some output bits, so we need all bits of the
134 // input to the left of, and including, the leftmost bit
136 ComputeKnownBits(BitWidth
, Val
, nullptr);
137 AB
= APInt::getHighBitsSet(BitWidth
,
138 std::min(BitWidth
, Known
.countMaxLeadingZeros()+1));
141 case Intrinsic::cttz
:
142 if (OperandNo
== 0) {
143 // We need some output bits, so we need all bits of the
144 // input to the right of, and including, the rightmost bit
146 ComputeKnownBits(BitWidth
, Val
, nullptr);
147 AB
= APInt::getLowBitsSet(BitWidth
,
148 std::min(BitWidth
, Known
.countMaxTrailingZeros()+1));
151 case Intrinsic::fshl
:
152 case Intrinsic::fshr
: {
154 if (OperandNo
== 2) {
155 // Shift amount is modulo the bitwidth. For powers of two we have
156 // SA % BW == SA & (BW - 1).
157 if (isPowerOf2_32(BitWidth
))
159 } else if (match(II
->getOperand(2), m_APInt(SA
))) {
160 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
161 // defined, so no need to special-case zero shifts here.
162 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
163 if (II
->getIntrinsicID() == Intrinsic::fshr
)
164 ShiftAmt
= BitWidth
- ShiftAmt
;
167 AB
= AOut
.lshr(ShiftAmt
);
168 else if (OperandNo
== 1)
169 AB
= AOut
.shl(BitWidth
- ShiftAmt
);
173 case Intrinsic::umax
:
174 case Intrinsic::umin
:
175 case Intrinsic::smax
:
176 case Intrinsic::smin
:
177 // If low bits of result are not demanded, they are also not demanded
178 // for the min/max operands.
179 AB
= APInt::getBitsSetFrom(BitWidth
, AOut
.countTrailingZeros());
184 case Instruction::Add
:
188 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
189 AB
= determineLiveOperandBitsAdd(OperandNo
, AOut
, Known
, Known2
);
192 case Instruction::Sub
:
196 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
197 AB
= determineLiveOperandBitsSub(OperandNo
, AOut
, Known
, Known2
);
200 case Instruction::Mul
:
201 // Find the highest live output bit. We don't need any more input
202 // bits than that (adds, and thus subtracts, ripple only to the
204 AB
= APInt::getLowBitsSet(BitWidth
, AOut
.getActiveBits());
206 case Instruction::Shl
:
207 if (OperandNo
== 0) {
208 const APInt
*ShiftAmtC
;
209 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
210 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
211 AB
= AOut
.lshr(ShiftAmt
);
213 // If the shift is nuw/nsw, then the high bits are not dead
214 // (because we've promised that they *must* be zero).
215 const ShlOperator
*S
= cast
<ShlOperator
>(UserI
);
216 if (S
->hasNoSignedWrap())
217 AB
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
+1);
218 else if (S
->hasNoUnsignedWrap())
219 AB
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
);
223 case Instruction::LShr
:
224 if (OperandNo
== 0) {
225 const APInt
*ShiftAmtC
;
226 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
227 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
228 AB
= AOut
.shl(ShiftAmt
);
230 // If the shift is exact, then the low bits are not dead
231 // (they must be zero).
232 if (cast
<LShrOperator
>(UserI
)->isExact())
233 AB
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
237 case Instruction::AShr
:
238 if (OperandNo
== 0) {
239 const APInt
*ShiftAmtC
;
240 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
241 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
242 AB
= AOut
.shl(ShiftAmt
);
243 // Because the high input bit is replicated into the
244 // high-order bits of the result, if we need any of those
245 // bits, then we must keep the highest input bit.
246 if ((AOut
& APInt::getHighBitsSet(BitWidth
, ShiftAmt
))
250 // If the shift is exact, then the low bits are not dead
251 // (they must be zero).
252 if (cast
<AShrOperator
>(UserI
)->isExact())
253 AB
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
257 case Instruction::And
:
260 // For bits that are known zero, the corresponding bits in the
261 // other operand are dead (unless they're both zero, in which
262 // case they can't both be dead, so just mark the LHS bits as
264 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
268 AB
&= ~(Known
.Zero
& ~Known2
.Zero
);
270 case Instruction::Or
:
273 // For bits that are known one, the corresponding bits in the
274 // other operand are dead (unless they're both one, in which
275 // case they can't both be dead, so just mark the LHS bits as
277 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
281 AB
&= ~(Known
.One
& ~Known2
.One
);
283 case Instruction::Xor
:
284 case Instruction::PHI
:
287 case Instruction::Trunc
:
288 AB
= AOut
.zext(BitWidth
);
290 case Instruction::ZExt
:
291 AB
= AOut
.trunc(BitWidth
);
293 case Instruction::SExt
:
294 AB
= AOut
.trunc(BitWidth
);
295 // Because the high input bit is replicated into the
296 // high-order bits of the result, if we need any of those
297 // bits, then we must keep the highest input bit.
298 if ((AOut
& APInt::getHighBitsSet(AOut
.getBitWidth(),
299 AOut
.getBitWidth() - BitWidth
))
303 case Instruction::Select
:
307 case Instruction::ExtractElement
:
311 case Instruction::InsertElement
:
312 case Instruction::ShuffleVector
:
313 if (OperandNo
== 0 || OperandNo
== 1)
319 bool DemandedBitsWrapperPass::runOnFunction(Function
&F
) {
320 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
321 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
322 DB
.emplace(F
, AC
, DT
);
326 void DemandedBitsWrapperPass::releaseMemory() {
330 void DemandedBits::performAnalysis() {
332 // Analysis already completed for this function.
340 SmallSetVector
<Instruction
*, 16> Worklist
;
342 // Collect the set of "root" instructions that are known live.
343 for (Instruction
&I
: instructions(F
)) {
344 if (!isAlwaysLive(&I
))
347 LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I
<< "\n");
348 // For integer-valued instructions, set up an initial empty set of alive
349 // bits and add the instruction to the work list. For other instructions
350 // add their operands to the work list (for integer values operands, mark
351 // all bits as live).
352 Type
*T
= I
.getType();
353 if (T
->isIntOrIntVectorTy()) {
354 if (AliveBits
.try_emplace(&I
, T
->getScalarSizeInBits(), 0).second
)
360 // Non-integer-typed instructions...
361 for (Use
&OI
: I
.operands()) {
362 if (Instruction
*J
= dyn_cast
<Instruction
>(OI
)) {
363 Type
*T
= J
->getType();
364 if (T
->isIntOrIntVectorTy())
365 AliveBits
[J
] = APInt::getAllOnesValue(T
->getScalarSizeInBits());
371 // To save memory, we don't add I to the Visited set here. Instead, we
372 // check isAlwaysLive on every instruction when searching for dead
373 // instructions later (we need to check isAlwaysLive for the
374 // integer-typed instructions anyway).
377 // Propagate liveness backwards to operands.
378 while (!Worklist
.empty()) {
379 Instruction
*UserI
= Worklist
.pop_back_val();
381 LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI
);
383 bool InputIsKnownDead
= false;
384 if (UserI
->getType()->isIntOrIntVectorTy()) {
385 AOut
= AliveBits
[UserI
];
386 LLVM_DEBUG(dbgs() << " Alive Out: 0x"
387 << Twine::utohexstr(AOut
.getLimitedValue()));
389 // If all bits of the output are dead, then all bits of the input
391 InputIsKnownDead
= !AOut
&& !isAlwaysLive(UserI
);
393 LLVM_DEBUG(dbgs() << "\n");
395 KnownBits Known
, Known2
;
396 bool KnownBitsComputed
= false;
397 // Compute the set of alive bits for each operand. These are anded into the
398 // existing set, if any, and if that changes the set of alive bits, the
399 // operand is added to the work-list.
400 for (Use
&OI
: UserI
->operands()) {
401 // We also want to detect dead uses of arguments, but will only store
402 // demanded bits for instructions.
403 Instruction
*I
= dyn_cast
<Instruction
>(OI
);
404 if (!I
&& !isa
<Argument
>(OI
))
407 Type
*T
= OI
->getType();
408 if (T
->isIntOrIntVectorTy()) {
409 unsigned BitWidth
= T
->getScalarSizeInBits();
410 APInt AB
= APInt::getAllOnesValue(BitWidth
);
411 if (InputIsKnownDead
) {
412 AB
= APInt(BitWidth
, 0);
414 // Bits of each operand that are used to compute alive bits of the
415 // output are alive, all others are dead.
416 determineLiveOperandBits(UserI
, OI
, OI
.getOperandNo(), AOut
, AB
,
417 Known
, Known2
, KnownBitsComputed
);
419 // Keep track of uses which have no demanded bits.
420 if (AB
.isNullValue())
421 DeadUses
.insert(&OI
);
427 // If we've added to the set of alive bits (or the operand has not
428 // been previously visited), then re-queue the operand to be visited
430 auto Res
= AliveBits
.try_emplace(I
);
431 if (Res
.second
|| (AB
|= Res
.first
->second
) != Res
.first
->second
) {
432 Res
.first
->second
= std::move(AB
);
436 } else if (I
&& Visited
.insert(I
).second
) {
443 APInt
DemandedBits::getDemandedBits(Instruction
*I
) {
446 auto Found
= AliveBits
.find(I
);
447 if (Found
!= AliveBits
.end())
448 return Found
->second
;
450 const DataLayout
&DL
= I
->getModule()->getDataLayout();
451 return APInt::getAllOnesValue(
452 DL
.getTypeSizeInBits(I
->getType()->getScalarType()));
455 APInt
DemandedBits::getDemandedBits(Use
*U
) {
456 Type
*T
= (*U
)->getType();
457 Instruction
*UserI
= cast
<Instruction
>(U
->getUser());
458 const DataLayout
&DL
= UserI
->getModule()->getDataLayout();
459 unsigned BitWidth
= DL
.getTypeSizeInBits(T
->getScalarType());
461 // We only track integer uses, everything else produces a mask with all bits
463 if (!T
->isIntOrIntVectorTy())
464 return APInt::getAllOnesValue(BitWidth
);
467 return APInt(BitWidth
, 0);
471 APInt AOut
= getDemandedBits(UserI
);
472 APInt AB
= APInt::getAllOnesValue(BitWidth
);
473 KnownBits Known
, Known2
;
474 bool KnownBitsComputed
= false;
476 determineLiveOperandBits(UserI
, *U
, U
->getOperandNo(), AOut
, AB
, Known
,
477 Known2
, KnownBitsComputed
);
482 bool DemandedBits::isInstructionDead(Instruction
*I
) {
485 return !Visited
.count(I
) && AliveBits
.find(I
) == AliveBits
.end() &&
489 bool DemandedBits::isUseDead(Use
*U
) {
490 // We only track integer uses, everything else is assumed live.
491 if (!(*U
)->getType()->isIntOrIntVectorTy())
494 // Uses by always-live instructions are never dead.
495 Instruction
*UserI
= cast
<Instruction
>(U
->getUser());
496 if (isAlwaysLive(UserI
))
500 if (DeadUses
.count(U
))
503 // If no output bits are demanded, no input bits are demanded and the use
504 // is dead. These uses might not be explicitly present in the DeadUses map.
505 if (UserI
->getType()->isIntOrIntVectorTy()) {
506 auto Found
= AliveBits
.find(UserI
);
507 if (Found
!= AliveBits
.end() && Found
->second
.isNullValue())
514 void DemandedBits::print(raw_ostream
&OS
) {
515 auto PrintDB
= [&](const Instruction
*I
, const APInt
&A
, Value
*V
= nullptr) {
516 OS
<< "DemandedBits: 0x" << Twine::utohexstr(A
.getLimitedValue())
519 V
->printAsOperand(OS
, false);
526 for (auto &KV
: AliveBits
) {
527 Instruction
*I
= KV
.first
;
528 PrintDB(I
, KV
.second
);
530 for (Use
&OI
: I
->operands()) {
531 PrintDB(I
, getDemandedBits(&OI
), OI
);
536 static APInt
determineLiveOperandBitsAddCarry(unsigned OperandNo
,
538 const KnownBits
&LHS
,
539 const KnownBits
&RHS
,
540 bool CarryZero
, bool CarryOne
) {
541 assert(!(CarryZero
&& CarryOne
) &&
542 "Carry can't be zero and one at the same time");
544 // The following check should be done by the caller, as it also indicates
545 // that LHS and RHS don't need to be computed.
547 // if (AOut.isMask())
550 // Boundary bits' carry out is unaffected by their carry in.
551 APInt Bound
= (LHS
.Zero
& RHS
.Zero
) | (LHS
.One
& RHS
.One
);
553 // First, the alive carry bits are determined from the alive output bits:
554 // Let demand ripple to the right but only up to any set bit in Bound.
557 // ACarry&~AOut = --111-
558 APInt RBound
= Bound
.reverseBits();
559 APInt RAOut
= AOut
.reverseBits();
560 APInt RProp
= RAOut
+ (RAOut
| ~RBound
);
561 APInt RACarry
= RProp
^ ~RBound
;
562 APInt ACarry
= RACarry
.reverseBits();
564 // Then, the alive input bits are determined from the alive carry bits:
565 APInt NeededToMaintainCarryZero
;
566 APInt NeededToMaintainCarryOne
;
567 if (OperandNo
== 0) {
568 NeededToMaintainCarryZero
= LHS
.Zero
| ~RHS
.Zero
;
569 NeededToMaintainCarryOne
= LHS
.One
| ~RHS
.One
;
571 NeededToMaintainCarryZero
= RHS
.Zero
| ~LHS
.Zero
;
572 NeededToMaintainCarryOne
= RHS
.One
| ~LHS
.One
;
575 // As in computeForAddCarry
576 APInt PossibleSumZero
= ~LHS
.Zero
+ ~RHS
.Zero
+ !CarryZero
;
577 APInt PossibleSumOne
= LHS
.One
+ RHS
.One
+ CarryOne
;
579 // The below is simplified from
581 // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
582 // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One;
583 // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne);
585 // APInt NeededToMaintainCarry =
586 // (CarryKnownZero & NeededToMaintainCarryZero) |
587 // (CarryKnownOne & NeededToMaintainCarryOne) |
590 APInt NeededToMaintainCarry
= (~PossibleSumZero
| NeededToMaintainCarryZero
) &
591 (PossibleSumOne
| NeededToMaintainCarryOne
);
593 APInt AB
= AOut
| (ACarry
& NeededToMaintainCarry
);
597 APInt
DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo
,
599 const KnownBits
&LHS
,
600 const KnownBits
&RHS
) {
601 return determineLiveOperandBitsAddCarry(OperandNo
, AOut
, LHS
, RHS
, true,
605 APInt
DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo
,
607 const KnownBits
&LHS
,
608 const KnownBits
&RHS
) {
612 return determineLiveOperandBitsAddCarry(OperandNo
, AOut
, LHS
, NRHS
, false,
616 FunctionPass
*llvm::createDemandedBitsWrapperPass() {
617 return new DemandedBitsWrapperPass();
620 AnalysisKey
DemandedBitsAnalysis::Key
;
622 DemandedBits
DemandedBitsAnalysis::run(Function
&F
,
623 FunctionAnalysisManager
&AM
) {
624 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
625 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
626 return DemandedBits(F
, AC
, DT
);
629 PreservedAnalyses
DemandedBitsPrinterPass::run(Function
&F
,
630 FunctionAnalysisManager
&AM
) {
631 AM
.getResult
<DemandedBitsAnalysis
>(F
).print(OS
);
632 return PreservedAnalyses::all();