1 //===- DemandedBits.cpp - Determine demanded bits -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements a demanded bits analysis. A demanded bit is one that
10 // contributes to a result; bits that are not demanded can be either zero or
11 // one without affecting control or data flow. For example in this sequence:
13 // %1 = add i32 %x, %y
14 // %2 = trunc i32 %1 to i16
16 // Only the lowest 16 bits of %1 are demanded; the rest are removed by the
19 //===----------------------------------------------------------------------===//
21 #include "llvm/Analysis/DemandedBits.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Analysis/AssumptionCache.h"
26 #include "llvm/Analysis/ValueTracking.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/InstIterator.h"
33 #include "llvm/IR/InstrTypes.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Intrinsics.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Use.h"
43 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
46 #include "llvm/Support/KnownBits.h"
47 #include "llvm/Support/raw_ostream.h"
52 using namespace llvm::PatternMatch
;
54 #define DEBUG_TYPE "demanded-bits"
56 char DemandedBitsWrapperPass::ID
= 0;
58 INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass
, "demanded-bits",
59 "Demanded bits analysis", false, false)
60 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
61 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
62 INITIALIZE_PASS_END(DemandedBitsWrapperPass
, "demanded-bits",
63 "Demanded bits analysis", false, false)
65 DemandedBitsWrapperPass::DemandedBitsWrapperPass() : FunctionPass(ID
) {
66 initializeDemandedBitsWrapperPassPass(*PassRegistry::getPassRegistry());
69 void DemandedBitsWrapperPass::getAnalysisUsage(AnalysisUsage
&AU
) const {
71 AU
.addRequired
<AssumptionCacheTracker
>();
72 AU
.addRequired
<DominatorTreeWrapperPass
>();
76 void DemandedBitsWrapperPass::print(raw_ostream
&OS
, const Module
*M
) const {
80 static bool isAlwaysLive(Instruction
*I
) {
81 return I
->isTerminator() || isa
<DbgInfoIntrinsic
>(I
) || I
->isEHPad() ||
82 I
->mayHaveSideEffects();
85 void DemandedBits::determineLiveOperandBits(
86 const Instruction
*UserI
, const Value
*Val
, unsigned OperandNo
,
87 const APInt
&AOut
, APInt
&AB
, KnownBits
&Known
, KnownBits
&Known2
,
88 bool &KnownBitsComputed
) {
89 unsigned BitWidth
= AB
.getBitWidth();
91 // We're called once per operand, but for some instructions, we need to
92 // compute known bits of both operands in order to determine the live bits of
93 // either (when both operands are instructions themselves). We don't,
94 // however, want to do this twice, so we cache the result in APInts that live
95 // in the caller. For the two-relevant-operands case, both operand values are
97 auto ComputeKnownBits
=
98 [&](unsigned BitWidth
, const Value
*V1
, const Value
*V2
) {
99 if (KnownBitsComputed
)
101 KnownBitsComputed
= true;
103 const DataLayout
&DL
= UserI
->getModule()->getDataLayout();
104 Known
= KnownBits(BitWidth
);
105 computeKnownBits(V1
, Known
, DL
, 0, &AC
, UserI
, &DT
);
108 Known2
= KnownBits(BitWidth
);
109 computeKnownBits(V2
, Known2
, DL
, 0, &AC
, UserI
, &DT
);
113 switch (UserI
->getOpcode()) {
115 case Instruction::Call
:
116 case Instruction::Invoke
:
117 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(UserI
))
118 switch (II
->getIntrinsicID()) {
120 case Intrinsic::bswap
:
121 // The alive bits of the input are the swapped alive bits of
123 AB
= AOut
.byteSwap();
125 case Intrinsic::bitreverse
:
126 // The alive bits of the input are the reversed alive bits of
128 AB
= AOut
.reverseBits();
130 case Intrinsic::ctlz
:
131 if (OperandNo
== 0) {
132 // We need some output bits, so we need all bits of the
133 // input to the left of, and including, the leftmost bit
135 ComputeKnownBits(BitWidth
, Val
, nullptr);
136 AB
= APInt::getHighBitsSet(BitWidth
,
137 std::min(BitWidth
, Known
.countMaxLeadingZeros()+1));
140 case Intrinsic::cttz
:
141 if (OperandNo
== 0) {
142 // We need some output bits, so we need all bits of the
143 // input to the right of, and including, the rightmost bit
145 ComputeKnownBits(BitWidth
, Val
, nullptr);
146 AB
= APInt::getLowBitsSet(BitWidth
,
147 std::min(BitWidth
, Known
.countMaxTrailingZeros()+1));
150 case Intrinsic::fshl
:
151 case Intrinsic::fshr
: {
153 if (OperandNo
== 2) {
154 // Shift amount is modulo the bitwidth. For powers of two we have
155 // SA % BW == SA & (BW - 1).
156 if (isPowerOf2_32(BitWidth
))
158 } else if (match(II
->getOperand(2), m_APInt(SA
))) {
159 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
160 // defined, so no need to special-case zero shifts here.
161 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
162 if (II
->getIntrinsicID() == Intrinsic::fshr
)
163 ShiftAmt
= BitWidth
- ShiftAmt
;
166 AB
= AOut
.lshr(ShiftAmt
);
167 else if (OperandNo
== 1)
168 AB
= AOut
.shl(BitWidth
- ShiftAmt
);
174 case Instruction::Add
:
175 case Instruction::Sub
:
176 case Instruction::Mul
:
177 // Find the highest live output bit. We don't need any more input
178 // bits than that (adds, and thus subtracts, ripple only to the
180 AB
= APInt::getLowBitsSet(BitWidth
, AOut
.getActiveBits());
182 case Instruction::Shl
:
183 if (OperandNo
== 0) {
184 const APInt
*ShiftAmtC
;
185 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
186 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
187 AB
= AOut
.lshr(ShiftAmt
);
189 // If the shift is nuw/nsw, then the high bits are not dead
190 // (because we've promised that they *must* be zero).
191 const ShlOperator
*S
= cast
<ShlOperator
>(UserI
);
192 if (S
->hasNoSignedWrap())
193 AB
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
+1);
194 else if (S
->hasNoUnsignedWrap())
195 AB
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
);
199 case Instruction::LShr
:
200 if (OperandNo
== 0) {
201 const APInt
*ShiftAmtC
;
202 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
203 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
204 AB
= AOut
.shl(ShiftAmt
);
206 // If the shift is exact, then the low bits are not dead
207 // (they must be zero).
208 if (cast
<LShrOperator
>(UserI
)->isExact())
209 AB
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
213 case Instruction::AShr
:
214 if (OperandNo
== 0) {
215 const APInt
*ShiftAmtC
;
216 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
217 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
218 AB
= AOut
.shl(ShiftAmt
);
219 // Because the high input bit is replicated into the
220 // high-order bits of the result, if we need any of those
221 // bits, then we must keep the highest input bit.
222 if ((AOut
& APInt::getHighBitsSet(BitWidth
, ShiftAmt
))
226 // If the shift is exact, then the low bits are not dead
227 // (they must be zero).
228 if (cast
<AShrOperator
>(UserI
)->isExact())
229 AB
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
233 case Instruction::And
:
236 // For bits that are known zero, the corresponding bits in the
237 // other operand are dead (unless they're both zero, in which
238 // case they can't both be dead, so just mark the LHS bits as
240 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
244 AB
&= ~(Known
.Zero
& ~Known2
.Zero
);
246 case Instruction::Or
:
249 // For bits that are known one, the corresponding bits in the
250 // other operand are dead (unless they're both one, in which
251 // case they can't both be dead, so just mark the LHS bits as
253 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
257 AB
&= ~(Known
.One
& ~Known2
.One
);
259 case Instruction::Xor
:
260 case Instruction::PHI
:
263 case Instruction::Trunc
:
264 AB
= AOut
.zext(BitWidth
);
266 case Instruction::ZExt
:
267 AB
= AOut
.trunc(BitWidth
);
269 case Instruction::SExt
:
270 AB
= AOut
.trunc(BitWidth
);
271 // Because the high input bit is replicated into the
272 // high-order bits of the result, if we need any of those
273 // bits, then we must keep the highest input bit.
274 if ((AOut
& APInt::getHighBitsSet(AOut
.getBitWidth(),
275 AOut
.getBitWidth() - BitWidth
))
279 case Instruction::Select
:
283 case Instruction::ExtractElement
:
287 case Instruction::InsertElement
:
288 case Instruction::ShuffleVector
:
289 if (OperandNo
== 0 || OperandNo
== 1)
295 bool DemandedBitsWrapperPass::runOnFunction(Function
&F
) {
296 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
297 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
298 DB
.emplace(F
, AC
, DT
);
302 void DemandedBitsWrapperPass::releaseMemory() {
306 void DemandedBits::performAnalysis() {
308 // Analysis already completed for this function.
316 SmallSetVector
<Instruction
*, 16> Worklist
;
318 // Collect the set of "root" instructions that are known live.
319 for (Instruction
&I
: instructions(F
)) {
320 if (!isAlwaysLive(&I
))
323 LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I
<< "\n");
324 // For integer-valued instructions, set up an initial empty set of alive
325 // bits and add the instruction to the work list. For other instructions
326 // add their operands to the work list (for integer values operands, mark
327 // all bits as live).
328 Type
*T
= I
.getType();
329 if (T
->isIntOrIntVectorTy()) {
330 if (AliveBits
.try_emplace(&I
, T
->getScalarSizeInBits(), 0).second
)
336 // Non-integer-typed instructions...
337 for (Use
&OI
: I
.operands()) {
338 if (Instruction
*J
= dyn_cast
<Instruction
>(OI
)) {
339 Type
*T
= J
->getType();
340 if (T
->isIntOrIntVectorTy())
341 AliveBits
[J
] = APInt::getAllOnesValue(T
->getScalarSizeInBits());
345 // To save memory, we don't add I to the Visited set here. Instead, we
346 // check isAlwaysLive on every instruction when searching for dead
347 // instructions later (we need to check isAlwaysLive for the
348 // integer-typed instructions anyway).
351 // Propagate liveness backwards to operands.
352 while (!Worklist
.empty()) {
353 Instruction
*UserI
= Worklist
.pop_back_val();
355 LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI
);
357 if (UserI
->getType()->isIntOrIntVectorTy()) {
358 AOut
= AliveBits
[UserI
];
359 LLVM_DEBUG(dbgs() << " Alive Out: 0x"
360 << Twine::utohexstr(AOut
.getLimitedValue()));
362 LLVM_DEBUG(dbgs() << "\n");
364 if (!UserI
->getType()->isIntOrIntVectorTy())
365 Visited
.insert(UserI
);
367 KnownBits Known
, Known2
;
368 bool KnownBitsComputed
= false;
369 // Compute the set of alive bits for each operand. These are anded into the
370 // existing set, if any, and if that changes the set of alive bits, the
371 // operand is added to the work-list.
372 for (Use
&OI
: UserI
->operands()) {
373 // We also want to detect dead uses of arguments, but will only store
374 // demanded bits for instructions.
375 Instruction
*I
= dyn_cast
<Instruction
>(OI
);
376 if (!I
&& !isa
<Argument
>(OI
))
379 Type
*T
= OI
->getType();
380 if (T
->isIntOrIntVectorTy()) {
381 unsigned BitWidth
= T
->getScalarSizeInBits();
382 APInt AB
= APInt::getAllOnesValue(BitWidth
);
383 if (UserI
->getType()->isIntOrIntVectorTy() && !AOut
&&
384 !isAlwaysLive(UserI
)) {
385 // If all bits of the output are dead, then all bits of the input
387 AB
= APInt(BitWidth
, 0);
389 // Bits of each operand that are used to compute alive bits of the
390 // output are alive, all others are dead.
391 determineLiveOperandBits(UserI
, OI
, OI
.getOperandNo(), AOut
, AB
,
392 Known
, Known2
, KnownBitsComputed
);
394 // Keep track of uses which have no demanded bits.
395 if (AB
.isNullValue())
396 DeadUses
.insert(&OI
);
402 // If we've added to the set of alive bits (or the operand has not
403 // been previously visited), then re-queue the operand to be visited
405 APInt
ABPrev(BitWidth
, 0);
406 auto ABI
= AliveBits
.find(I
);
407 if (ABI
!= AliveBits
.end())
408 ABPrev
= ABI
->second
;
410 APInt ABNew
= AB
| ABPrev
;
411 if (ABNew
!= ABPrev
|| ABI
== AliveBits
.end()) {
412 AliveBits
[I
] = std::move(ABNew
);
416 } else if (I
&& !Visited
.count(I
)) {
423 APInt
DemandedBits::getDemandedBits(Instruction
*I
) {
426 auto Found
= AliveBits
.find(I
);
427 if (Found
!= AliveBits
.end())
428 return Found
->second
;
430 const DataLayout
&DL
= I
->getModule()->getDataLayout();
431 return APInt::getAllOnesValue(
432 DL
.getTypeSizeInBits(I
->getType()->getScalarType()));
435 bool DemandedBits::isInstructionDead(Instruction
*I
) {
438 return !Visited
.count(I
) && AliveBits
.find(I
) == AliveBits
.end() &&
442 bool DemandedBits::isUseDead(Use
*U
) {
443 // We only track integer uses, everything else is assumed live.
444 if (!(*U
)->getType()->isIntOrIntVectorTy())
447 // Uses by always-live instructions are never dead.
448 Instruction
*UserI
= cast
<Instruction
>(U
->getUser());
449 if (isAlwaysLive(UserI
))
453 if (DeadUses
.count(U
))
456 // If no output bits are demanded, no input bits are demanded and the use
457 // is dead. These uses might not be explicitly present in the DeadUses map.
458 if (UserI
->getType()->isIntOrIntVectorTy()) {
459 auto Found
= AliveBits
.find(UserI
);
460 if (Found
!= AliveBits
.end() && Found
->second
.isNullValue())
467 void DemandedBits::print(raw_ostream
&OS
) {
469 for (auto &KV
: AliveBits
) {
470 OS
<< "DemandedBits: 0x" << Twine::utohexstr(KV
.second
.getLimitedValue())
471 << " for " << *KV
.first
<< '\n';
475 FunctionPass
*llvm::createDemandedBitsWrapperPass() {
476 return new DemandedBitsWrapperPass();
479 AnalysisKey
DemandedBitsAnalysis::Key
;
481 DemandedBits
DemandedBitsAnalysis::run(Function
&F
,
482 FunctionAnalysisManager
&AM
) {
483 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
484 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
485 return DemandedBits(F
, AC
, DT
);
488 PreservedAnalyses
DemandedBitsPrinterPass::run(Function
&F
,
489 FunctionAnalysisManager
&AM
) {
490 AM
.getResult
<DemandedBitsAnalysis
>(F
).print(OS
);
491 return PreservedAnalyses::all();