1 //===- DemandedBits.cpp - Determine demanded bits -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements a demanded bits analysis. A demanded bit is one that
10 // contributes to a result; bits that are not demanded can be either zero or
11 // one without affecting control or data flow. For example in this sequence:
13 // %1 = add i32 %x, %y
14 // %2 = trunc i32 %1 to i16
16 // Only the lowest 16 bits of %1 are demanded; the rest are removed by the
19 //===----------------------------------------------------------------------===//
21 #include "llvm/Analysis/DemandedBits.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Analysis/AssumptionCache.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/InstIterator.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Operator.h"
33 #include "llvm/IR/PassManager.h"
34 #include "llvm/IR/PatternMatch.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/Use.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/KnownBits.h"
40 #include "llvm/Support/raw_ostream.h"
45 using namespace llvm::PatternMatch
;
47 #define DEBUG_TYPE "demanded-bits"
49 static bool isAlwaysLive(Instruction
*I
) {
50 return I
->isTerminator() || isa
<DbgInfoIntrinsic
>(I
) || I
->isEHPad() ||
51 I
->mayHaveSideEffects();
54 void DemandedBits::determineLiveOperandBits(
55 const Instruction
*UserI
, const Value
*Val
, unsigned OperandNo
,
56 const APInt
&AOut
, APInt
&AB
, KnownBits
&Known
, KnownBits
&Known2
,
57 bool &KnownBitsComputed
) {
58 unsigned BitWidth
= AB
.getBitWidth();
60 // We're called once per operand, but for some instructions, we need to
61 // compute known bits of both operands in order to determine the live bits of
62 // either (when both operands are instructions themselves). We don't,
63 // however, want to do this twice, so we cache the result in APInts that live
64 // in the caller. For the two-relevant-operands case, both operand values are
66 auto ComputeKnownBits
=
67 [&](unsigned BitWidth
, const Value
*V1
, const Value
*V2
) {
68 if (KnownBitsComputed
)
70 KnownBitsComputed
= true;
72 const DataLayout
&DL
= UserI
->getModule()->getDataLayout();
73 Known
= KnownBits(BitWidth
);
74 computeKnownBits(V1
, Known
, DL
, 0, &AC
, UserI
, &DT
);
77 Known2
= KnownBits(BitWidth
);
78 computeKnownBits(V2
, Known2
, DL
, 0, &AC
, UserI
, &DT
);
82 switch (UserI
->getOpcode()) {
84 case Instruction::Call
:
85 case Instruction::Invoke
:
86 if (const auto *II
= dyn_cast
<IntrinsicInst
>(UserI
)) {
87 switch (II
->getIntrinsicID()) {
89 case Intrinsic::bswap
:
90 // The alive bits of the input are the swapped alive bits of
94 case Intrinsic::bitreverse
:
95 // The alive bits of the input are the reversed alive bits of
97 AB
= AOut
.reverseBits();
100 if (OperandNo
== 0) {
101 // We need some output bits, so we need all bits of the
102 // input to the left of, and including, the leftmost bit
104 ComputeKnownBits(BitWidth
, Val
, nullptr);
105 AB
= APInt::getHighBitsSet(BitWidth
,
106 std::min(BitWidth
, Known
.countMaxLeadingZeros()+1));
109 case Intrinsic::cttz
:
110 if (OperandNo
== 0) {
111 // We need some output bits, so we need all bits of the
112 // input to the right of, and including, the rightmost bit
114 ComputeKnownBits(BitWidth
, Val
, nullptr);
115 AB
= APInt::getLowBitsSet(BitWidth
,
116 std::min(BitWidth
, Known
.countMaxTrailingZeros()+1));
119 case Intrinsic::fshl
:
120 case Intrinsic::fshr
: {
122 if (OperandNo
== 2) {
123 // Shift amount is modulo the bitwidth. For powers of two we have
124 // SA % BW == SA & (BW - 1).
125 if (isPowerOf2_32(BitWidth
))
127 } else if (match(II
->getOperand(2), m_APInt(SA
))) {
128 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
129 // defined, so no need to special-case zero shifts here.
130 uint64_t ShiftAmt
= SA
->urem(BitWidth
);
131 if (II
->getIntrinsicID() == Intrinsic::fshr
)
132 ShiftAmt
= BitWidth
- ShiftAmt
;
135 AB
= AOut
.lshr(ShiftAmt
);
136 else if (OperandNo
== 1)
137 AB
= AOut
.shl(BitWidth
- ShiftAmt
);
141 case Intrinsic::umax
:
142 case Intrinsic::umin
:
143 case Intrinsic::smax
:
144 case Intrinsic::smin
:
145 // If low bits of result are not demanded, they are also not demanded
146 // for the min/max operands.
147 AB
= APInt::getBitsSetFrom(BitWidth
, AOut
.countr_zero());
152 case Instruction::Add
:
156 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
157 AB
= determineLiveOperandBitsAdd(OperandNo
, AOut
, Known
, Known2
);
160 case Instruction::Sub
:
164 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
165 AB
= determineLiveOperandBitsSub(OperandNo
, AOut
, Known
, Known2
);
168 case Instruction::Mul
:
169 // Find the highest live output bit. We don't need any more input
170 // bits than that (adds, and thus subtracts, ripple only to the
172 AB
= APInt::getLowBitsSet(BitWidth
, AOut
.getActiveBits());
174 case Instruction::Shl
:
175 if (OperandNo
== 0) {
176 const APInt
*ShiftAmtC
;
177 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
178 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
179 AB
= AOut
.lshr(ShiftAmt
);
181 // If the shift is nuw/nsw, then the high bits are not dead
182 // (because we've promised that they *must* be zero).
183 const auto *S
= cast
<ShlOperator
>(UserI
);
184 if (S
->hasNoSignedWrap())
185 AB
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
+1);
186 else if (S
->hasNoUnsignedWrap())
187 AB
|= APInt::getHighBitsSet(BitWidth
, ShiftAmt
);
191 case Instruction::LShr
:
192 if (OperandNo
== 0) {
193 const APInt
*ShiftAmtC
;
194 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
195 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
196 AB
= AOut
.shl(ShiftAmt
);
198 // If the shift is exact, then the low bits are not dead
199 // (they must be zero).
200 if (cast
<LShrOperator
>(UserI
)->isExact())
201 AB
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
205 case Instruction::AShr
:
206 if (OperandNo
== 0) {
207 const APInt
*ShiftAmtC
;
208 if (match(UserI
->getOperand(1), m_APInt(ShiftAmtC
))) {
209 uint64_t ShiftAmt
= ShiftAmtC
->getLimitedValue(BitWidth
- 1);
210 AB
= AOut
.shl(ShiftAmt
);
211 // Because the high input bit is replicated into the
212 // high-order bits of the result, if we need any of those
213 // bits, then we must keep the highest input bit.
214 if ((AOut
& APInt::getHighBitsSet(BitWidth
, ShiftAmt
))
218 // If the shift is exact, then the low bits are not dead
219 // (they must be zero).
220 if (cast
<AShrOperator
>(UserI
)->isExact())
221 AB
|= APInt::getLowBitsSet(BitWidth
, ShiftAmt
);
225 case Instruction::And
:
228 // For bits that are known zero, the corresponding bits in the
229 // other operand are dead (unless they're both zero, in which
230 // case they can't both be dead, so just mark the LHS bits as
232 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
236 AB
&= ~(Known
.Zero
& ~Known2
.Zero
);
238 case Instruction::Or
:
241 // For bits that are known one, the corresponding bits in the
242 // other operand are dead (unless they're both one, in which
243 // case they can't both be dead, so just mark the LHS bits as
245 ComputeKnownBits(BitWidth
, UserI
->getOperand(0), UserI
->getOperand(1));
249 AB
&= ~(Known
.One
& ~Known2
.One
);
251 case Instruction::Xor
:
252 case Instruction::PHI
:
255 case Instruction::Trunc
:
256 AB
= AOut
.zext(BitWidth
);
258 case Instruction::ZExt
:
259 AB
= AOut
.trunc(BitWidth
);
261 case Instruction::SExt
:
262 AB
= AOut
.trunc(BitWidth
);
263 // Because the high input bit is replicated into the
264 // high-order bits of the result, if we need any of those
265 // bits, then we must keep the highest input bit.
266 if ((AOut
& APInt::getHighBitsSet(AOut
.getBitWidth(),
267 AOut
.getBitWidth() - BitWidth
))
271 case Instruction::Select
:
275 case Instruction::ExtractElement
:
279 case Instruction::InsertElement
:
280 case Instruction::ShuffleVector
:
281 if (OperandNo
== 0 || OperandNo
== 1)
287 void DemandedBits::performAnalysis() {
289 // Analysis already completed for this function.
297 SmallSetVector
<Instruction
*, 16> Worklist
;
299 // Collect the set of "root" instructions that are known live.
300 for (Instruction
&I
: instructions(F
)) {
301 if (!isAlwaysLive(&I
))
304 LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I
<< "\n");
305 // For integer-valued instructions, set up an initial empty set of alive
306 // bits and add the instruction to the work list. For other instructions
307 // add their operands to the work list (for integer values operands, mark
308 // all bits as live).
309 Type
*T
= I
.getType();
310 if (T
->isIntOrIntVectorTy()) {
311 if (AliveBits
.try_emplace(&I
, T
->getScalarSizeInBits(), 0).second
)
317 // Non-integer-typed instructions...
318 for (Use
&OI
: I
.operands()) {
319 if (auto *J
= dyn_cast
<Instruction
>(OI
)) {
320 Type
*T
= J
->getType();
321 if (T
->isIntOrIntVectorTy())
322 AliveBits
[J
] = APInt::getAllOnes(T
->getScalarSizeInBits());
328 // To save memory, we don't add I to the Visited set here. Instead, we
329 // check isAlwaysLive on every instruction when searching for dead
330 // instructions later (we need to check isAlwaysLive for the
331 // integer-typed instructions anyway).
334 // Propagate liveness backwards to operands.
335 while (!Worklist
.empty()) {
336 Instruction
*UserI
= Worklist
.pop_back_val();
338 LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI
);
340 bool InputIsKnownDead
= false;
341 if (UserI
->getType()->isIntOrIntVectorTy()) {
342 AOut
= AliveBits
[UserI
];
343 LLVM_DEBUG(dbgs() << " Alive Out: 0x"
344 << Twine::utohexstr(AOut
.getLimitedValue()));
346 // If all bits of the output are dead, then all bits of the input
348 InputIsKnownDead
= !AOut
&& !isAlwaysLive(UserI
);
350 LLVM_DEBUG(dbgs() << "\n");
352 KnownBits Known
, Known2
;
353 bool KnownBitsComputed
= false;
354 // Compute the set of alive bits for each operand. These are anded into the
355 // existing set, if any, and if that changes the set of alive bits, the
356 // operand is added to the work-list.
357 for (Use
&OI
: UserI
->operands()) {
358 // We also want to detect dead uses of arguments, but will only store
359 // demanded bits for instructions.
360 auto *I
= dyn_cast
<Instruction
>(OI
);
361 if (!I
&& !isa
<Argument
>(OI
))
364 Type
*T
= OI
->getType();
365 if (T
->isIntOrIntVectorTy()) {
366 unsigned BitWidth
= T
->getScalarSizeInBits();
367 APInt AB
= APInt::getAllOnes(BitWidth
);
368 if (InputIsKnownDead
) {
369 AB
= APInt(BitWidth
, 0);
371 // Bits of each operand that are used to compute alive bits of the
372 // output are alive, all others are dead.
373 determineLiveOperandBits(UserI
, OI
, OI
.getOperandNo(), AOut
, AB
,
374 Known
, Known2
, KnownBitsComputed
);
376 // Keep track of uses which have no demanded bits.
378 DeadUses
.insert(&OI
);
384 // If we've added to the set of alive bits (or the operand has not
385 // been previously visited), then re-queue the operand to be visited
387 auto Res
= AliveBits
.try_emplace(I
);
388 if (Res
.second
|| (AB
|= Res
.first
->second
) != Res
.first
->second
) {
389 Res
.first
->second
= std::move(AB
);
393 } else if (I
&& Visited
.insert(I
).second
) {
400 APInt
DemandedBits::getDemandedBits(Instruction
*I
) {
403 auto Found
= AliveBits
.find(I
);
404 if (Found
!= AliveBits
.end())
405 return Found
->second
;
407 const DataLayout
&DL
= I
->getModule()->getDataLayout();
408 return APInt::getAllOnes(DL
.getTypeSizeInBits(I
->getType()->getScalarType()));
411 APInt
DemandedBits::getDemandedBits(Use
*U
) {
412 Type
*T
= (*U
)->getType();
413 auto *UserI
= cast
<Instruction
>(U
->getUser());
414 const DataLayout
&DL
= UserI
->getModule()->getDataLayout();
415 unsigned BitWidth
= DL
.getTypeSizeInBits(T
->getScalarType());
417 // We only track integer uses, everything else produces a mask with all bits
419 if (!T
->isIntOrIntVectorTy())
420 return APInt::getAllOnes(BitWidth
);
423 return APInt(BitWidth
, 0);
427 APInt AOut
= getDemandedBits(UserI
);
428 APInt AB
= APInt::getAllOnes(BitWidth
);
429 KnownBits Known
, Known2
;
430 bool KnownBitsComputed
= false;
432 determineLiveOperandBits(UserI
, *U
, U
->getOperandNo(), AOut
, AB
, Known
,
433 Known2
, KnownBitsComputed
);
438 bool DemandedBits::isInstructionDead(Instruction
*I
) {
441 return !Visited
.count(I
) && !AliveBits
.contains(I
) && !isAlwaysLive(I
);
444 bool DemandedBits::isUseDead(Use
*U
) {
445 // We only track integer uses, everything else is assumed live.
446 if (!(*U
)->getType()->isIntOrIntVectorTy())
449 // Uses by always-live instructions are never dead.
450 auto *UserI
= cast
<Instruction
>(U
->getUser());
451 if (isAlwaysLive(UserI
))
455 if (DeadUses
.count(U
))
458 // If no output bits are demanded, no input bits are demanded and the use
459 // is dead. These uses might not be explicitly present in the DeadUses map.
460 if (UserI
->getType()->isIntOrIntVectorTy()) {
461 auto Found
= AliveBits
.find(UserI
);
462 if (Found
!= AliveBits
.end() && Found
->second
.isZero())
469 void DemandedBits::print(raw_ostream
&OS
) {
470 auto PrintDB
= [&](const Instruction
*I
, const APInt
&A
, Value
*V
= nullptr) {
471 OS
<< "DemandedBits: 0x" << Twine::utohexstr(A
.getLimitedValue())
474 V
->printAsOperand(OS
, false);
480 OS
<< "Printing analysis 'Demanded Bits Analysis' for function '" << F
.getName() << "':\n";
482 for (auto &KV
: AliveBits
) {
483 Instruction
*I
= KV
.first
;
484 PrintDB(I
, KV
.second
);
486 for (Use
&OI
: I
->operands()) {
487 PrintDB(I
, getDemandedBits(&OI
), OI
);
492 static APInt
determineLiveOperandBitsAddCarry(unsigned OperandNo
,
494 const KnownBits
&LHS
,
495 const KnownBits
&RHS
,
496 bool CarryZero
, bool CarryOne
) {
497 assert(!(CarryZero
&& CarryOne
) &&
498 "Carry can't be zero and one at the same time");
500 // The following check should be done by the caller, as it also indicates
501 // that LHS and RHS don't need to be computed.
503 // if (AOut.isMask())
506 // Boundary bits' carry out is unaffected by their carry in.
507 APInt Bound
= (LHS
.Zero
& RHS
.Zero
) | (LHS
.One
& RHS
.One
);
509 // First, the alive carry bits are determined from the alive output bits:
510 // Let demand ripple to the right but only up to any set bit in Bound.
513 // ACarry&~AOut = --111-
514 APInt RBound
= Bound
.reverseBits();
515 APInt RAOut
= AOut
.reverseBits();
516 APInt RProp
= RAOut
+ (RAOut
| ~RBound
);
517 APInt RACarry
= RProp
^ ~RBound
;
518 APInt ACarry
= RACarry
.reverseBits();
520 // Then, the alive input bits are determined from the alive carry bits:
521 APInt NeededToMaintainCarryZero
;
522 APInt NeededToMaintainCarryOne
;
523 if (OperandNo
== 0) {
524 NeededToMaintainCarryZero
= LHS
.Zero
| ~RHS
.Zero
;
525 NeededToMaintainCarryOne
= LHS
.One
| ~RHS
.One
;
527 NeededToMaintainCarryZero
= RHS
.Zero
| ~LHS
.Zero
;
528 NeededToMaintainCarryOne
= RHS
.One
| ~LHS
.One
;
531 // As in computeForAddCarry
532 APInt PossibleSumZero
= ~LHS
.Zero
+ ~RHS
.Zero
+ !CarryZero
;
533 APInt PossibleSumOne
= LHS
.One
+ RHS
.One
+ CarryOne
;
535 // The below is simplified from
537 // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
538 // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One;
539 // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne);
541 // APInt NeededToMaintainCarry =
542 // (CarryKnownZero & NeededToMaintainCarryZero) |
543 // (CarryKnownOne & NeededToMaintainCarryOne) |
546 APInt NeededToMaintainCarry
= (~PossibleSumZero
| NeededToMaintainCarryZero
) &
547 (PossibleSumOne
| NeededToMaintainCarryOne
);
549 APInt AB
= AOut
| (ACarry
& NeededToMaintainCarry
);
553 APInt
DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo
,
555 const KnownBits
&LHS
,
556 const KnownBits
&RHS
) {
557 return determineLiveOperandBitsAddCarry(OperandNo
, AOut
, LHS
, RHS
, true,
561 APInt
DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo
,
563 const KnownBits
&LHS
,
564 const KnownBits
&RHS
) {
568 return determineLiveOperandBitsAddCarry(OperandNo
, AOut
, LHS
, NRHS
, false,
572 AnalysisKey
DemandedBitsAnalysis::Key
;
574 DemandedBits
DemandedBitsAnalysis::run(Function
&F
,
575 FunctionAnalysisManager
&AM
) {
576 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
577 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
578 return DemandedBits(F
, AC
, DT
);
581 PreservedAnalyses
DemandedBitsPrinterPass::run(Function
&F
,
582 FunctionAnalysisManager
&AM
) {
583 AM
.getResult
<DemandedBitsAnalysis
>(F
).print(OS
);
584 return PreservedAnalyses::all();