AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / CodeGen / GlobalISel / GISelKnownBits.cpp
blob6c15ed3423d3bd6dc2f4e42970c950db7f2136f1
1 //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Provides analysis for querying information about KnownBits during GISel
10 /// passes.
12 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/IR/ConstantRange.h"
23 #include "llvm/Target/TargetMachine.h"
25 #define DEBUG_TYPE "gisel-known-bits"
27 using namespace llvm;
29 char llvm::GISelKnownBitsAnalysis::ID = 0;
31 INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
32 "Analysis for ComputingKnownBits", false, true)
34 GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
35 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
36 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
38 Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
39 const MachineInstr *MI = MRI.getVRegDef(R);
40 switch (MI->getOpcode()) {
41 case TargetOpcode::COPY:
42 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
43 case TargetOpcode::G_ASSERT_ALIGN: {
44 // TODO: Min with source
45 return Align(MI->getOperand(2).getImm());
47 case TargetOpcode::G_FRAME_INDEX: {
48 int FrameIdx = MI->getOperand(1).getIndex();
49 return MF.getFrameInfo().getObjectAlign(FrameIdx);
51 case TargetOpcode::G_INTRINSIC:
52 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
53 case TargetOpcode::G_INTRINSIC_CONVERGENT:
54 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
55 default:
56 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
60 KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
61 assert(MI.getNumExplicitDefs() == 1 &&
62 "expected single return generic instruction");
63 return getKnownBits(MI.getOperand(0).getReg());
66 KnownBits GISelKnownBits::getKnownBits(Register R) {
67 const LLT Ty = MRI.getType(R);
68 // Since the number of lanes in a scalable vector is unknown at compile time,
69 // we track one bit which is implicitly broadcast to all lanes. This means
70 // that all lanes in a scalable vector are considered demanded.
71 APInt DemandedElts =
72 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
73 return getKnownBits(R, DemandedElts);
76 KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
77 unsigned Depth) {
78 // For now, we only maintain the cache during one request.
79 assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
81 KnownBits Known;
82 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
83 ComputeKnownBitsCache.clear();
84 return Known;
87 bool GISelKnownBits::signBitIsZero(Register R) {
88 LLT Ty = MRI.getType(R);
89 unsigned BitWidth = Ty.getScalarSizeInBits();
90 return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
93 APInt GISelKnownBits::getKnownZeroes(Register R) {
94 return getKnownBits(R).Zero;
97 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
99 LLVM_ATTRIBUTE_UNUSED static void
100 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
101 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
102 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
103 << toString(Known.Zero | Known.One, 16, false) << "\n"
104 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
105 << "\n"
106 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
107 << "\n";
110 /// Compute known bits for the intersection of \p Src0 and \p Src1
111 void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
112 KnownBits &Known,
113 const APInt &DemandedElts,
114 unsigned Depth) {
115 // Test src1 first, since we canonicalize simpler expressions to the RHS.
116 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
118 // If we don't know any bits, early out.
119 if (Known.isUnknown())
120 return;
122 KnownBits Known2;
123 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
125 // Only known if known in both the LHS and RHS.
126 Known = Known.intersectWith(Known2);
129 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
130 // created using Width. Use this function when the inputs are KnownBits
131 // objects. TODO: Move this KnownBits.h if this is usable in more cases.
132 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
133 const KnownBits &OffsetKnown,
134 const KnownBits &WidthKnown) {
135 KnownBits Mask(BitWidth);
136 Mask.Zero = APInt::getBitsSetFrom(
137 BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
138 Mask.One = APInt::getLowBitsSet(
139 BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
140 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
143 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
144 const APInt &DemandedElts,
145 unsigned Depth) {
146 MachineInstr &MI = *MRI.getVRegDef(R);
147 unsigned Opcode = MI.getOpcode();
148 LLT DstTy = MRI.getType(R);
150 // Handle the case where this is called on a register that does not have a
151 // type constraint. For example, it may be post-ISel or this target might not
152 // preserve the type when early-selecting instructions.
153 if (!DstTy.isValid()) {
154 Known = KnownBits();
155 return;
158 #ifndef NDEBUG
159 if (DstTy.isFixedVector()) {
160 assert(
161 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
162 "DemandedElt width should equal the fixed vector number of elements");
163 } else {
164 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
165 "DemandedElt width should be 1 for scalars or scalable vectors");
167 #endif
169 unsigned BitWidth = DstTy.getScalarSizeInBits();
170 auto CacheEntry = ComputeKnownBitsCache.find(R);
171 if (CacheEntry != ComputeKnownBitsCache.end()) {
172 Known = CacheEntry->second;
173 LLVM_DEBUG(dbgs() << "Cache hit at ");
174 LLVM_DEBUG(dumpResult(MI, Known, Depth));
175 assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
176 return;
178 Known = KnownBits(BitWidth); // Don't know anything
180 // Depth may get bigger than max depth if it gets passed to a different
181 // GISelKnownBits object.
182 // This may happen when say a generic part uses a GISelKnownBits object
183 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
184 // which creates a new GISelKnownBits object with a different and smaller
185 // depth. If we just check for equality, we would never exit if the depth
186 // that is passed down to the target specific GISelKnownBits object is
187 // already bigger than its max depth.
188 if (Depth >= getMaxDepth())
189 return;
191 if (!DemandedElts)
192 return; // No demanded elts, better to assume we don't know anything.
194 KnownBits Known2;
196 switch (Opcode) {
197 default:
198 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
199 Depth);
200 break;
201 case TargetOpcode::G_BUILD_VECTOR: {
202 // Collect the known bits that are shared by every demanded vector element.
203 Known.Zero.setAllBits(); Known.One.setAllBits();
204 for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
205 if (!DemandedElts[i])
206 continue;
208 computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, APInt(1, 1),
209 Depth + 1);
211 // Known bits are the values that are shared by every demanded element.
212 Known = Known.intersectWith(Known2);
214 // If we don't know any bits, early out.
215 if (Known.isUnknown())
216 break;
218 break;
220 case TargetOpcode::COPY:
221 case TargetOpcode::G_PHI:
222 case TargetOpcode::PHI: {
223 Known.One = APInt::getAllOnes(BitWidth);
224 Known.Zero = APInt::getAllOnes(BitWidth);
225 // Destination registers should not have subregisters at this
226 // point of the pipeline, otherwise the main live-range will be
227 // defined more than once, which is against SSA.
228 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
229 // Record in the cache that we know nothing for MI.
230 // This will get updated later and in the meantime, if we reach that
231 // phi again, because of a loop, we will cut the search thanks to this
232 // cache entry.
233 // We could actually build up more information on the phi by not cutting
234 // the search, but that additional information is more a side effect
235 // than an intended choice.
236 // Therefore, for now, save on compile time until we derive a proper way
237 // to derive known bits for PHIs within loops.
238 ComputeKnownBitsCache[R] = KnownBits(BitWidth);
239 // PHI's operand are a mix of registers and basic blocks interleaved.
240 // We only care about the register ones.
241 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
242 const MachineOperand &Src = MI.getOperand(Idx);
243 Register SrcReg = Src.getReg();
244 // Look through trivial copies and phis but don't look through trivial
245 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
246 // analysis is currently unable to determine the bit width of a
247 // register class.
249 // We can't use NoSubRegister by name as it's defined by each target but
250 // it's always defined to be 0 by tablegen.
251 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
252 MRI.getType(SrcReg).isValid()) {
253 // For COPYs we don't do anything, don't increase the depth.
254 computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
255 Depth + (Opcode != TargetOpcode::COPY));
256 Known2 = Known2.anyextOrTrunc(BitWidth);
257 Known = Known.intersectWith(Known2);
258 // If we reach a point where we don't know anything
259 // just stop looking through the operands.
260 if (Known.isUnknown())
261 break;
262 } else {
263 // We know nothing.
264 Known = KnownBits(BitWidth);
265 break;
268 break;
270 case TargetOpcode::G_CONSTANT: {
271 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
272 break;
274 case TargetOpcode::G_FRAME_INDEX: {
275 int FrameIdx = MI.getOperand(1).getIndex();
276 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
277 break;
279 case TargetOpcode::G_SUB: {
280 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
281 Depth + 1);
282 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
283 Depth + 1);
284 Known = KnownBits::sub(Known, Known2);
285 break;
287 case TargetOpcode::G_XOR: {
288 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
289 Depth + 1);
290 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
291 Depth + 1);
293 Known ^= Known2;
294 break;
296 case TargetOpcode::G_PTR_ADD: {
297 if (DstTy.isVector())
298 break;
299 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
300 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
301 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
302 break;
303 [[fallthrough]];
305 case TargetOpcode::G_ADD: {
306 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
307 Depth + 1);
308 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
309 Depth + 1);
310 Known = KnownBits::add(Known, Known2);
311 break;
313 case TargetOpcode::G_AND: {
314 // If either the LHS or the RHS are Zero, the result is zero.
315 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
316 Depth + 1);
317 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
318 Depth + 1);
320 Known &= Known2;
321 break;
323 case TargetOpcode::G_OR: {
324 // If either the LHS or the RHS are Zero, the result is zero.
325 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
326 Depth + 1);
327 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
328 Depth + 1);
330 Known |= Known2;
331 break;
333 case TargetOpcode::G_MUL: {
334 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
335 Depth + 1);
336 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
337 Depth + 1);
338 Known = KnownBits::mul(Known, Known2);
339 break;
341 case TargetOpcode::G_SELECT: {
342 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
343 Known, DemandedElts, Depth + 1);
344 break;
346 case TargetOpcode::G_SMIN: {
347 // TODO: Handle clamp pattern with number of sign bits
348 KnownBits KnownRHS;
349 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
350 Depth + 1);
351 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
352 Depth + 1);
353 Known = KnownBits::smin(Known, KnownRHS);
354 break;
356 case TargetOpcode::G_SMAX: {
357 // TODO: Handle clamp pattern with number of sign bits
358 KnownBits KnownRHS;
359 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
360 Depth + 1);
361 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
362 Depth + 1);
363 Known = KnownBits::smax(Known, KnownRHS);
364 break;
366 case TargetOpcode::G_UMIN: {
367 KnownBits KnownRHS;
368 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
369 DemandedElts, Depth + 1);
370 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
371 DemandedElts, Depth + 1);
372 Known = KnownBits::umin(Known, KnownRHS);
373 break;
375 case TargetOpcode::G_UMAX: {
376 KnownBits KnownRHS;
377 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
378 DemandedElts, Depth + 1);
379 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
380 DemandedElts, Depth + 1);
381 Known = KnownBits::umax(Known, KnownRHS);
382 break;
384 case TargetOpcode::G_FCMP:
385 case TargetOpcode::G_ICMP: {
386 if (DstTy.isVector())
387 break;
388 if (TL.getBooleanContents(DstTy.isVector(),
389 Opcode == TargetOpcode::G_FCMP) ==
390 TargetLowering::ZeroOrOneBooleanContent &&
391 BitWidth > 1)
392 Known.Zero.setBitsFrom(1);
393 break;
395 case TargetOpcode::G_SEXT: {
396 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
397 Depth + 1);
398 // If the sign bit is known to be zero or one, then sext will extend
399 // it to the top bits, else it will just zext.
400 Known = Known.sext(BitWidth);
401 break;
403 case TargetOpcode::G_ASSERT_SEXT:
404 case TargetOpcode::G_SEXT_INREG: {
405 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
406 Depth + 1);
407 Known = Known.sextInReg(MI.getOperand(2).getImm());
408 break;
410 case TargetOpcode::G_ANYEXT: {
411 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
412 Depth + 1);
413 Known = Known.anyext(BitWidth);
414 break;
416 case TargetOpcode::G_LOAD: {
417 const MachineMemOperand *MMO = *MI.memoperands_begin();
418 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
419 if (const MDNode *Ranges = MMO->getRanges())
420 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
421 Known = KnownRange.anyext(Known.getBitWidth());
422 break;
424 case TargetOpcode::G_SEXTLOAD:
425 case TargetOpcode::G_ZEXTLOAD: {
426 if (DstTy.isVector())
427 break;
428 const MachineMemOperand *MMO = *MI.memoperands_begin();
429 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
430 if (const MDNode *Ranges = MMO->getRanges())
431 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
432 Known = Opcode == TargetOpcode::G_SEXTLOAD
433 ? KnownRange.sext(Known.getBitWidth())
434 : KnownRange.zext(Known.getBitWidth());
435 break;
437 case TargetOpcode::G_ASHR: {
438 KnownBits LHSKnown, RHSKnown;
439 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
440 Depth + 1);
441 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
442 Depth + 1);
443 Known = KnownBits::ashr(LHSKnown, RHSKnown);
444 break;
446 case TargetOpcode::G_LSHR: {
447 KnownBits LHSKnown, RHSKnown;
448 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
449 Depth + 1);
450 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
451 Depth + 1);
452 Known = KnownBits::lshr(LHSKnown, RHSKnown);
453 break;
455 case TargetOpcode::G_SHL: {
456 KnownBits LHSKnown, RHSKnown;
457 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
458 Depth + 1);
459 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
460 Depth + 1);
461 Known = KnownBits::shl(LHSKnown, RHSKnown);
462 break;
464 case TargetOpcode::G_INTTOPTR:
465 case TargetOpcode::G_PTRTOINT:
466 if (DstTy.isVector())
467 break;
468 // Fall through and handle them the same as zext/trunc.
469 [[fallthrough]];
470 case TargetOpcode::G_ASSERT_ZEXT:
471 case TargetOpcode::G_ZEXT:
472 case TargetOpcode::G_TRUNC: {
473 Register SrcReg = MI.getOperand(1).getReg();
474 LLT SrcTy = MRI.getType(SrcReg);
475 unsigned SrcBitWidth;
477 // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
478 if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
479 SrcBitWidth = MI.getOperand(2).getImm();
480 else {
481 SrcBitWidth = SrcTy.isPointer()
482 ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
483 : SrcTy.getSizeInBits();
485 assert(SrcBitWidth && "SrcBitWidth can't be zero");
486 Known = Known.zextOrTrunc(SrcBitWidth);
487 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
488 Known = Known.zextOrTrunc(BitWidth);
489 if (BitWidth > SrcBitWidth)
490 Known.Zero.setBitsFrom(SrcBitWidth);
491 break;
493 case TargetOpcode::G_ASSERT_ALIGN: {
494 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
496 // TODO: Should use maximum with source
497 // If a node is guaranteed to be aligned, set low zero bits accordingly as
498 // well as clearing one bits.
499 Known.Zero.setLowBits(LogOfAlign);
500 Known.One.clearLowBits(LogOfAlign);
501 break;
503 case TargetOpcode::G_MERGE_VALUES: {
504 unsigned NumOps = MI.getNumOperands();
505 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
507 for (unsigned I = 0; I != NumOps - 1; ++I) {
508 KnownBits SrcOpKnown;
509 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
510 DemandedElts, Depth + 1);
511 Known.insertBits(SrcOpKnown, I * OpSize);
513 break;
515 case TargetOpcode::G_UNMERGE_VALUES: {
516 unsigned NumOps = MI.getNumOperands();
517 Register SrcReg = MI.getOperand(NumOps - 1).getReg();
518 LLT SrcTy = MRI.getType(SrcReg);
520 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
521 return; // TODO: Handle vector->subelement unmerges
523 // Figure out the result operand index
524 unsigned DstIdx = 0;
525 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
526 ++DstIdx)
529 APInt SubDemandedElts = DemandedElts;
530 if (SrcTy.isVector()) {
531 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
532 SubDemandedElts =
533 DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
536 KnownBits SrcOpKnown;
537 computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
539 if (SrcTy.isVector())
540 Known = std::move(SrcOpKnown);
541 else
542 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
543 break;
545 case TargetOpcode::G_BSWAP: {
546 Register SrcReg = MI.getOperand(1).getReg();
547 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
548 Known = Known.byteSwap();
549 break;
551 case TargetOpcode::G_BITREVERSE: {
552 Register SrcReg = MI.getOperand(1).getReg();
553 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
554 Known = Known.reverseBits();
555 break;
557 case TargetOpcode::G_CTPOP: {
558 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
559 Depth + 1);
560 // We can bound the space the count needs. Also, bits known to be zero can't
561 // contribute to the population.
562 unsigned BitsPossiblySet = Known2.countMaxPopulation();
563 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
564 Known.Zero.setBitsFrom(LowBits);
565 // TODO: we could bound Known.One using the lower bound on the number of
566 // bits which might be set provided by popcnt KnownOne2.
567 break;
569 case TargetOpcode::G_UBFX: {
570 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
571 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
572 Depth + 1);
573 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
574 Depth + 1);
575 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
576 Depth + 1);
577 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
578 break;
580 case TargetOpcode::G_SBFX: {
581 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
582 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
583 Depth + 1);
584 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
585 Depth + 1);
586 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
587 Depth + 1);
588 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
589 // Sign extend the extracted value using shift left and arithmetic shift
590 // right.
591 KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
592 KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown);
593 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
594 break;
596 case TargetOpcode::G_UADDO:
597 case TargetOpcode::G_UADDE:
598 case TargetOpcode::G_SADDO:
599 case TargetOpcode::G_SADDE:
600 case TargetOpcode::G_USUBO:
601 case TargetOpcode::G_USUBE:
602 case TargetOpcode::G_SSUBO:
603 case TargetOpcode::G_SSUBE:
604 case TargetOpcode::G_UMULO:
605 case TargetOpcode::G_SMULO: {
606 if (MI.getOperand(1).getReg() == R) {
607 // If we know the result of a compare has the top bits zero, use this
608 // info.
609 if (TL.getBooleanContents(DstTy.isVector(), false) ==
610 TargetLowering::ZeroOrOneBooleanContent &&
611 BitWidth > 1)
612 Known.Zero.setBitsFrom(1);
614 break;
616 case TargetOpcode::G_CTLZ:
617 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
618 KnownBits SrcOpKnown;
619 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
620 Depth + 1);
621 // If we have a known 1, its position is our upper bound.
622 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
623 unsigned LowBits = llvm::bit_width(PossibleLZ);
624 Known.Zero.setBitsFrom(LowBits);
625 break;
629 LLVM_DEBUG(dumpResult(MI, Known, Depth));
631 // Update the cache.
632 ComputeKnownBitsCache[R] = Known;
635 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1
636 unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
637 const APInt &DemandedElts,
638 unsigned Depth) {
639 // Test src1 first, since we canonicalize simpler expressions to the RHS.
640 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
641 if (Src1SignBits == 1)
642 return 1;
643 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
646 /// Compute the known number of sign bits with attached range metadata in the
647 /// memory operand. If this is an extending load, accounts for the behavior of
648 /// the high bits.
649 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
650 unsigned TyBits) {
651 const MDNode *Ranges = Ld->getRanges();
652 if (!Ranges)
653 return 1;
655 ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
656 if (TyBits > CR.getBitWidth()) {
657 switch (Ld->getOpcode()) {
658 case TargetOpcode::G_SEXTLOAD:
659 CR = CR.signExtend(TyBits);
660 break;
661 case TargetOpcode::G_ZEXTLOAD:
662 CR = CR.zeroExtend(TyBits);
663 break;
664 default:
665 break;
669 return std::min(CR.getSignedMin().getNumSignBits(),
670 CR.getSignedMax().getNumSignBits());
673 unsigned GISelKnownBits::computeNumSignBits(Register R,
674 const APInt &DemandedElts,
675 unsigned Depth) {
676 MachineInstr &MI = *MRI.getVRegDef(R);
677 unsigned Opcode = MI.getOpcode();
679 if (Opcode == TargetOpcode::G_CONSTANT)
680 return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
682 if (Depth == getMaxDepth())
683 return 1;
685 if (!DemandedElts)
686 return 1; // No demanded elts, better to assume we don't know anything.
688 LLT DstTy = MRI.getType(R);
689 const unsigned TyBits = DstTy.getScalarSizeInBits();
691 // Handle the case where this is called on a register that does not have a
692 // type constraint. This is unlikely to occur except by looking through copies
693 // but it is possible for the initial register being queried to be in this
694 // state.
695 if (!DstTy.isValid())
696 return 1;
698 unsigned FirstAnswer = 1;
699 switch (Opcode) {
700 case TargetOpcode::COPY: {
701 MachineOperand &Src = MI.getOperand(1);
702 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
703 MRI.getType(Src.getReg()).isValid()) {
704 // Don't increment Depth for this one since we didn't do any work.
705 return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
708 return 1;
710 case TargetOpcode::G_SEXT: {
711 Register Src = MI.getOperand(1).getReg();
712 LLT SrcTy = MRI.getType(Src);
713 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
714 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
716 case TargetOpcode::G_ASSERT_SEXT:
717 case TargetOpcode::G_SEXT_INREG: {
718 // Max of the input and what this extends.
719 Register Src = MI.getOperand(1).getReg();
720 unsigned SrcBits = MI.getOperand(2).getImm();
721 unsigned InRegBits = TyBits - SrcBits + 1;
722 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
724 case TargetOpcode::G_LOAD: {
725 GLoad *Ld = cast<GLoad>(&MI);
726 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
727 break;
729 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
731 case TargetOpcode::G_SEXTLOAD: {
732 GSExtLoad *Ld = cast<GSExtLoad>(&MI);
734 // FIXME: We need an in-memory type representation.
735 if (DstTy.isVector())
736 return 1;
738 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
739 if (NumBits != 1)
740 return NumBits;
742 // e.g. i16->i32 = '17' bits known.
743 const MachineMemOperand *MMO = *MI.memoperands_begin();
744 return TyBits - MMO->getSizeInBits().getValue() + 1;
746 case TargetOpcode::G_ZEXTLOAD: {
747 GZExtLoad *Ld = cast<GZExtLoad>(&MI);
749 // FIXME: We need an in-memory type representation.
750 if (DstTy.isVector())
751 return 1;
753 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
754 if (NumBits != 1)
755 return NumBits;
757 // e.g. i16->i32 = '16' bits known.
758 const MachineMemOperand *MMO = *MI.memoperands_begin();
759 return TyBits - MMO->getSizeInBits().getValue();
761 case TargetOpcode::G_AND:
762 case TargetOpcode::G_OR:
763 case TargetOpcode::G_XOR: {
764 Register Src1 = MI.getOperand(1).getReg();
765 unsigned Src1NumSignBits =
766 computeNumSignBits(Src1, DemandedElts, Depth + 1);
767 if (Src1NumSignBits != 1) {
768 Register Src2 = MI.getOperand(2).getReg();
769 unsigned Src2NumSignBits =
770 computeNumSignBits(Src2, DemandedElts, Depth + 1);
771 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
773 break;
775 case TargetOpcode::G_TRUNC: {
776 Register Src = MI.getOperand(1).getReg();
777 LLT SrcTy = MRI.getType(Src);
779 // Check if the sign bits of source go down as far as the truncated value.
780 unsigned DstTyBits = DstTy.getScalarSizeInBits();
781 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
782 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
783 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
784 return NumSrcSignBits - (NumSrcBits - DstTyBits);
785 break;
787 case TargetOpcode::G_SELECT: {
788 return computeNumSignBitsMin(MI.getOperand(2).getReg(),
789 MI.getOperand(3).getReg(), DemandedElts,
790 Depth + 1);
792 case TargetOpcode::G_SMIN:
793 case TargetOpcode::G_SMAX:
794 case TargetOpcode::G_UMIN:
795 case TargetOpcode::G_UMAX:
796 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
797 return computeNumSignBitsMin(MI.getOperand(1).getReg(),
798 MI.getOperand(2).getReg(), DemandedElts,
799 Depth + 1);
800 case TargetOpcode::G_SADDO:
801 case TargetOpcode::G_SADDE:
802 case TargetOpcode::G_UADDO:
803 case TargetOpcode::G_UADDE:
804 case TargetOpcode::G_SSUBO:
805 case TargetOpcode::G_SSUBE:
806 case TargetOpcode::G_USUBO:
807 case TargetOpcode::G_USUBE:
808 case TargetOpcode::G_SMULO:
809 case TargetOpcode::G_UMULO: {
810 // If compares returns 0/-1, all bits are sign bits.
811 // We know that we have an integer-based boolean since these operations
812 // are only available for integer.
813 if (MI.getOperand(1).getReg() == R) {
814 if (TL.getBooleanContents(DstTy.isVector(), false) ==
815 TargetLowering::ZeroOrNegativeOneBooleanContent)
816 return TyBits;
819 break;
821 case TargetOpcode::G_FCMP:
822 case TargetOpcode::G_ICMP: {
823 bool IsFP = Opcode == TargetOpcode::G_FCMP;
824 if (TyBits == 1)
825 break;
826 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
827 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
828 return TyBits; // All bits are sign bits.
829 if (BC == TargetLowering::ZeroOrOneBooleanContent)
830 return TyBits - 1; // Every always-zero bit is a sign bit.
831 break;
833 case TargetOpcode::G_INTRINSIC:
834 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
835 case TargetOpcode::G_INTRINSIC_CONVERGENT:
836 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
837 default: {
838 unsigned NumBits =
839 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
840 if (NumBits > 1)
841 FirstAnswer = std::max(FirstAnswer, NumBits);
842 break;
846 // Finally, if we can prove that the top bits of the result are 0's or 1's,
847 // use this information.
848 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
849 APInt Mask;
850 if (Known.isNonNegative()) { // sign bit is 0
851 Mask = Known.Zero;
852 } else if (Known.isNegative()) { // sign bit is 1;
853 Mask = Known.One;
854 } else {
855 // Nothing known.
856 return FirstAnswer;
859 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
860 // the number of identical bits in the top of the input value.
861 Mask <<= Mask.getBitWidth() - TyBits;
862 return std::max(FirstAnswer, Mask.countl_one());
865 unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
866 LLT Ty = MRI.getType(R);
867 APInt DemandedElts =
868 Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
869 return computeNumSignBits(R, DemandedElts, Depth);
872 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
873 AU.setPreservesAll();
874 MachineFunctionPass::getAnalysisUsage(AU);
877 bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
878 return false;
881 GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
882 if (!Info) {
883 unsigned MaxDepth =
884 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
885 Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
887 return *Info;