1 //===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines routines for folding instructions into constants.
11 // Also, to supplement the basic IR ConstantExpr simplifications,
12 // this file defines some additional folding routines that can make use of
13 // DataLayout information. These functions cannot go in IR due to library
16 //===----------------------------------------------------------------------===//
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/APSInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Analysis/TargetFolder.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/Analysis/VectorUtils.h"
31 #include "llvm/Config/config.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/GlobalVariable.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/IR/IntrinsicsAMDGPU.h"
46 #include "llvm/IR/IntrinsicsARM.h"
47 #include "llvm/IR/IntrinsicsWebAssembly.h"
48 #include "llvm/IR/IntrinsicsX86.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/KnownBits.h"
55 #include "llvm/Support/MathExtras.h"
66 Constant
*SymbolicallyEvaluateGEP(const GEPOperator
*GEP
,
67 ArrayRef
<Constant
*> Ops
,
69 const TargetLibraryInfo
*TLI
,
72 //===----------------------------------------------------------------------===//
73 // Constant Folding internal helper functions
74 //===----------------------------------------------------------------------===//
76 static Constant
*foldConstVectorToAPInt(APInt
&Result
, Type
*DestTy
,
77 Constant
*C
, Type
*SrcEltTy
,
79 const DataLayout
&DL
) {
80 // Now that we know that the input value is a vector of integers, just shift
81 // and insert them into our result.
82 unsigned BitShift
= DL
.getTypeSizeInBits(SrcEltTy
);
83 for (unsigned i
= 0; i
!= NumSrcElts
; ++i
) {
85 if (DL
.isLittleEndian())
86 Element
= C
->getAggregateElement(NumSrcElts
- i
- 1);
88 Element
= C
->getAggregateElement(i
);
90 if (Element
&& isa
<UndefValue
>(Element
)) {
95 auto *ElementCI
= dyn_cast_or_null
<ConstantInt
>(Element
);
97 return ConstantExpr::getBitCast(C
, DestTy
);
100 Result
|= ElementCI
->getValue().zextOrSelf(Result
.getBitWidth());
106 /// Constant fold bitcast, symbolically evaluating it with DataLayout.
107 /// This always returns a non-null constant, but it may be a
108 /// ConstantExpr if unfoldable.
109 Constant
*FoldBitCast(Constant
*C
, Type
*DestTy
, const DataLayout
&DL
) {
110 assert(CastInst::castIsValid(Instruction::BitCast
, C
, DestTy
) &&
111 "Invalid constantexpr bitcast!");
113 // Catch the obvious splat cases.
114 if (C
->isNullValue() && !DestTy
->isX86_MMXTy() && !DestTy
->isX86_AMXTy())
115 return Constant::getNullValue(DestTy
);
116 if (C
->isAllOnesValue() && !DestTy
->isX86_MMXTy() && !DestTy
->isX86_AMXTy() &&
117 !DestTy
->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
118 return Constant::getAllOnesValue(DestTy
);
120 if (auto *VTy
= dyn_cast
<VectorType
>(C
->getType())) {
121 // Handle a vector->scalar integer/fp cast.
122 if (isa
<IntegerType
>(DestTy
) || DestTy
->isFloatingPointTy()) {
123 unsigned NumSrcElts
= cast
<FixedVectorType
>(VTy
)->getNumElements();
124 Type
*SrcEltTy
= VTy
->getElementType();
126 // If the vector is a vector of floating point, convert it to vector of int
127 // to simplify things.
128 if (SrcEltTy
->isFloatingPointTy()) {
129 unsigned FPWidth
= SrcEltTy
->getPrimitiveSizeInBits();
130 auto *SrcIVTy
= FixedVectorType::get(
131 IntegerType::get(C
->getContext(), FPWidth
), NumSrcElts
);
132 // Ask IR to do the conversion now that #elts line up.
133 C
= ConstantExpr::getBitCast(C
, SrcIVTy
);
136 APInt
Result(DL
.getTypeSizeInBits(DestTy
), 0);
137 if (Constant
*CE
= foldConstVectorToAPInt(Result
, DestTy
, C
,
138 SrcEltTy
, NumSrcElts
, DL
))
141 if (isa
<IntegerType
>(DestTy
))
142 return ConstantInt::get(DestTy
, Result
);
144 APFloat
FP(DestTy
->getFltSemantics(), Result
);
145 return ConstantFP::get(DestTy
->getContext(), FP
);
149 // The code below only handles casts to vectors currently.
150 auto *DestVTy
= dyn_cast
<VectorType
>(DestTy
);
152 return ConstantExpr::getBitCast(C
, DestTy
);
154 // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
155 // vector so the code below can handle it uniformly.
156 if (isa
<ConstantFP
>(C
) || isa
<ConstantInt
>(C
)) {
157 Constant
*Ops
= C
; // don't take the address of C!
158 return FoldBitCast(ConstantVector::get(Ops
), DestTy
, DL
);
161 // If this is a bitcast from constant vector -> vector, fold it.
162 if (!isa
<ConstantDataVector
>(C
) && !isa
<ConstantVector
>(C
))
163 return ConstantExpr::getBitCast(C
, DestTy
);
165 // If the element types match, IR can fold it.
166 unsigned NumDstElt
= cast
<FixedVectorType
>(DestVTy
)->getNumElements();
167 unsigned NumSrcElt
= cast
<FixedVectorType
>(C
->getType())->getNumElements();
168 if (NumDstElt
== NumSrcElt
)
169 return ConstantExpr::getBitCast(C
, DestTy
);
171 Type
*SrcEltTy
= cast
<VectorType
>(C
->getType())->getElementType();
172 Type
*DstEltTy
= DestVTy
->getElementType();
174 // Otherwise, we're changing the number of elements in a vector, which
175 // requires endianness information to do the right thing. For example,
176 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
177 // folds to (little endian):
178 // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
179 // and to (big endian):
180 // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
182 // First thing is first. We only want to think about integer here, so if
183 // we have something in FP form, recast it as integer.
184 if (DstEltTy
->isFloatingPointTy()) {
185 // Fold to an vector of integers with same size as our FP type.
186 unsigned FPWidth
= DstEltTy
->getPrimitiveSizeInBits();
187 auto *DestIVTy
= FixedVectorType::get(
188 IntegerType::get(C
->getContext(), FPWidth
), NumDstElt
);
189 // Recursively handle this integer conversion, if possible.
190 C
= FoldBitCast(C
, DestIVTy
, DL
);
192 // Finally, IR can handle this now that #elts line up.
193 return ConstantExpr::getBitCast(C
, DestTy
);
196 // Okay, we know the destination is integer, if the input is FP, convert
197 // it to integer first.
198 if (SrcEltTy
->isFloatingPointTy()) {
199 unsigned FPWidth
= SrcEltTy
->getPrimitiveSizeInBits();
200 auto *SrcIVTy
= FixedVectorType::get(
201 IntegerType::get(C
->getContext(), FPWidth
), NumSrcElt
);
202 // Ask IR to do the conversion now that #elts line up.
203 C
= ConstantExpr::getBitCast(C
, SrcIVTy
);
204 // If IR wasn't able to fold it, bail out.
205 if (!isa
<ConstantVector
>(C
) && // FIXME: Remove ConstantVector.
206 !isa
<ConstantDataVector
>(C
))
210 // Now we know that the input and output vectors are both integer vectors
211 // of the same size, and that their #elements is not the same. Do the
212 // conversion here, which depends on whether the input or output has
214 bool isLittleEndian
= DL
.isLittleEndian();
216 SmallVector
<Constant
*, 32> Result
;
217 if (NumDstElt
< NumSrcElt
) {
218 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
219 Constant
*Zero
= Constant::getNullValue(DstEltTy
);
220 unsigned Ratio
= NumSrcElt
/NumDstElt
;
221 unsigned SrcBitSize
= SrcEltTy
->getPrimitiveSizeInBits();
223 for (unsigned i
= 0; i
!= NumDstElt
; ++i
) {
224 // Build each element of the result.
225 Constant
*Elt
= Zero
;
226 unsigned ShiftAmt
= isLittleEndian
? 0 : SrcBitSize
*(Ratio
-1);
227 for (unsigned j
= 0; j
!= Ratio
; ++j
) {
228 Constant
*Src
= C
->getAggregateElement(SrcElt
++);
229 if (Src
&& isa
<UndefValue
>(Src
))
230 Src
= Constant::getNullValue(
231 cast
<VectorType
>(C
->getType())->getElementType());
233 Src
= dyn_cast_or_null
<ConstantInt
>(Src
);
234 if (!Src
) // Reject constantexpr elements.
235 return ConstantExpr::getBitCast(C
, DestTy
);
237 // Zero extend the element to the right size.
238 Src
= ConstantExpr::getZExt(Src
, Elt
->getType());
240 // Shift it to the right place, depending on endianness.
241 Src
= ConstantExpr::getShl(Src
,
242 ConstantInt::get(Src
->getType(), ShiftAmt
));
243 ShiftAmt
+= isLittleEndian
? SrcBitSize
: -SrcBitSize
;
246 Elt
= ConstantExpr::getOr(Elt
, Src
);
248 Result
.push_back(Elt
);
250 return ConstantVector::get(Result
);
253 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
254 unsigned Ratio
= NumDstElt
/NumSrcElt
;
255 unsigned DstBitSize
= DL
.getTypeSizeInBits(DstEltTy
);
257 // Loop over each source value, expanding into multiple results.
258 for (unsigned i
= 0; i
!= NumSrcElt
; ++i
) {
259 auto *Element
= C
->getAggregateElement(i
);
261 if (!Element
) // Reject constantexpr elements.
262 return ConstantExpr::getBitCast(C
, DestTy
);
264 if (isa
<UndefValue
>(Element
)) {
265 // Correctly Propagate undef values.
266 Result
.append(Ratio
, UndefValue::get(DstEltTy
));
270 auto *Src
= dyn_cast
<ConstantInt
>(Element
);
272 return ConstantExpr::getBitCast(C
, DestTy
);
274 unsigned ShiftAmt
= isLittleEndian
? 0 : DstBitSize
*(Ratio
-1);
275 for (unsigned j
= 0; j
!= Ratio
; ++j
) {
276 // Shift the piece of the value into the right place, depending on
278 Constant
*Elt
= ConstantExpr::getLShr(Src
,
279 ConstantInt::get(Src
->getType(), ShiftAmt
));
280 ShiftAmt
+= isLittleEndian
? DstBitSize
: -DstBitSize
;
282 // Truncate the element to an integer with the same pointer size and
283 // convert the element back to a pointer using a inttoptr.
284 if (DstEltTy
->isPointerTy()) {
285 IntegerType
*DstIntTy
= Type::getIntNTy(C
->getContext(), DstBitSize
);
286 Constant
*CE
= ConstantExpr::getTrunc(Elt
, DstIntTy
);
287 Result
.push_back(ConstantExpr::getIntToPtr(CE
, DstEltTy
));
291 // Truncate and remember this piece.
292 Result
.push_back(ConstantExpr::getTrunc(Elt
, DstEltTy
));
296 return ConstantVector::get(Result
);
299 } // end anonymous namespace
301 /// If this constant is a constant offset from a global, return the global and
302 /// the constant. Because of constantexprs, this function is recursive.
303 bool llvm::IsConstantOffsetFromGlobal(Constant
*C
, GlobalValue
*&GV
,
304 APInt
&Offset
, const DataLayout
&DL
,
305 DSOLocalEquivalent
**DSOEquiv
) {
309 // Trivial case, constant is the global.
310 if ((GV
= dyn_cast
<GlobalValue
>(C
))) {
311 unsigned BitWidth
= DL
.getIndexTypeSizeInBits(GV
->getType());
312 Offset
= APInt(BitWidth
, 0);
316 if (auto *FoundDSOEquiv
= dyn_cast
<DSOLocalEquivalent
>(C
)) {
318 *DSOEquiv
= FoundDSOEquiv
;
319 GV
= FoundDSOEquiv
->getGlobalValue();
320 unsigned BitWidth
= DL
.getIndexTypeSizeInBits(GV
->getType());
321 Offset
= APInt(BitWidth
, 0);
325 // Otherwise, if this isn't a constant expr, bail out.
326 auto *CE
= dyn_cast
<ConstantExpr
>(C
);
327 if (!CE
) return false;
329 // Look through ptr->int and ptr->ptr casts.
330 if (CE
->getOpcode() == Instruction::PtrToInt
||
331 CE
->getOpcode() == Instruction::BitCast
)
332 return IsConstantOffsetFromGlobal(CE
->getOperand(0), GV
, Offset
, DL
,
335 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
336 auto *GEP
= dyn_cast
<GEPOperator
>(CE
);
340 unsigned BitWidth
= DL
.getIndexTypeSizeInBits(GEP
->getType());
341 APInt
TmpOffset(BitWidth
, 0);
343 // If the base isn't a global+constant, we aren't either.
344 if (!IsConstantOffsetFromGlobal(CE
->getOperand(0), GV
, TmpOffset
, DL
,
348 // Otherwise, add any offset that our operands provide.
349 if (!GEP
->accumulateConstantOffset(DL
, TmpOffset
))
356 Constant
*llvm::ConstantFoldLoadThroughBitcast(Constant
*C
, Type
*DestTy
,
357 const DataLayout
&DL
) {
359 Type
*SrcTy
= C
->getType();
360 uint64_t DestSize
= DL
.getTypeSizeInBits(DestTy
);
361 uint64_t SrcSize
= DL
.getTypeSizeInBits(SrcTy
);
362 if (SrcSize
< DestSize
)
365 // Catch the obvious splat cases (since all-zeros can coerce non-integral
366 // pointers legally).
367 if (C
->isNullValue() && !DestTy
->isX86_MMXTy() && !DestTy
->isX86_AMXTy())
368 return Constant::getNullValue(DestTy
);
369 if (C
->isAllOnesValue() &&
370 (DestTy
->isIntegerTy() || DestTy
->isFloatingPointTy() ||
371 DestTy
->isVectorTy()) &&
372 !DestTy
->isX86_AMXTy() && !DestTy
->isX86_MMXTy() &&
373 !DestTy
->isPtrOrPtrVectorTy())
374 // Get ones when the input is trivial, but
375 // only for supported types inside getAllOnesValue.
376 return Constant::getAllOnesValue(DestTy
);
378 // If the type sizes are the same and a cast is legal, just directly
379 // cast the constant.
380 // But be careful not to coerce non-integral pointers illegally.
381 if (SrcSize
== DestSize
&&
382 DL
.isNonIntegralPointerType(SrcTy
->getScalarType()) ==
383 DL
.isNonIntegralPointerType(DestTy
->getScalarType())) {
384 Instruction::CastOps Cast
= Instruction::BitCast
;
385 // If we are going from a pointer to int or vice versa, we spell the cast
387 if (SrcTy
->isIntegerTy() && DestTy
->isPointerTy())
388 Cast
= Instruction::IntToPtr
;
389 else if (SrcTy
->isPointerTy() && DestTy
->isIntegerTy())
390 Cast
= Instruction::PtrToInt
;
392 if (CastInst::castIsValid(Cast
, C
, DestTy
))
393 return ConstantExpr::getCast(Cast
, C
, DestTy
);
396 // If this isn't an aggregate type, there is nothing we can do to drill down
397 // and find a bitcastable constant.
398 if (!SrcTy
->isAggregateType() && !SrcTy
->isVectorTy())
401 // We're simulating a load through a pointer that was bitcast to point to
402 // a different type, so we can try to walk down through the initial
403 // elements of an aggregate to see if some part of the aggregate is
404 // castable to implement the "load" semantic model.
405 if (SrcTy
->isStructTy()) {
406 // Struct types might have leading zero-length elements like [0 x i32],
407 // which are certainly not what we are looking for, so skip them.
411 ElemC
= C
->getAggregateElement(Elem
++);
412 } while (ElemC
&& DL
.getTypeSizeInBits(ElemC
->getType()).isZero());
415 C
= C
->getAggregateElement(0u);
424 /// Recursive helper to read bits out of global. C is the constant being copied
425 /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
426 /// results into and BytesLeft is the number of bytes left in
427 /// the CurPtr buffer. DL is the DataLayout.
428 bool ReadDataFromGlobal(Constant
*C
, uint64_t ByteOffset
, unsigned char *CurPtr
,
429 unsigned BytesLeft
, const DataLayout
&DL
) {
430 assert(ByteOffset
<= DL
.getTypeAllocSize(C
->getType()) &&
431 "Out of range access");
433 // If this element is zero or undefined, we can just return since *CurPtr is
435 if (isa
<ConstantAggregateZero
>(C
) || isa
<UndefValue
>(C
))
438 if (auto *CI
= dyn_cast
<ConstantInt
>(C
)) {
439 if (CI
->getBitWidth() > 64 ||
440 (CI
->getBitWidth() & 7) != 0)
443 uint64_t Val
= CI
->getZExtValue();
444 unsigned IntBytes
= unsigned(CI
->getBitWidth()/8);
446 for (unsigned i
= 0; i
!= BytesLeft
&& ByteOffset
!= IntBytes
; ++i
) {
448 if (!DL
.isLittleEndian())
449 n
= IntBytes
- n
- 1;
450 CurPtr
[i
] = (unsigned char)(Val
>> (n
* 8));
456 if (auto *CFP
= dyn_cast
<ConstantFP
>(C
)) {
457 if (CFP
->getType()->isDoubleTy()) {
458 C
= FoldBitCast(C
, Type::getInt64Ty(C
->getContext()), DL
);
459 return ReadDataFromGlobal(C
, ByteOffset
, CurPtr
, BytesLeft
, DL
);
461 if (CFP
->getType()->isFloatTy()){
462 C
= FoldBitCast(C
, Type::getInt32Ty(C
->getContext()), DL
);
463 return ReadDataFromGlobal(C
, ByteOffset
, CurPtr
, BytesLeft
, DL
);
465 if (CFP
->getType()->isHalfTy()){
466 C
= FoldBitCast(C
, Type::getInt16Ty(C
->getContext()), DL
);
467 return ReadDataFromGlobal(C
, ByteOffset
, CurPtr
, BytesLeft
, DL
);
472 if (auto *CS
= dyn_cast
<ConstantStruct
>(C
)) {
473 const StructLayout
*SL
= DL
.getStructLayout(CS
->getType());
474 unsigned Index
= SL
->getElementContainingOffset(ByteOffset
);
475 uint64_t CurEltOffset
= SL
->getElementOffset(Index
);
476 ByteOffset
-= CurEltOffset
;
479 // If the element access is to the element itself and not to tail padding,
480 // read the bytes from the element.
481 uint64_t EltSize
= DL
.getTypeAllocSize(CS
->getOperand(Index
)->getType());
483 if (ByteOffset
< EltSize
&&
484 !ReadDataFromGlobal(CS
->getOperand(Index
), ByteOffset
, CurPtr
,
490 // Check to see if we read from the last struct element, if so we're done.
491 if (Index
== CS
->getType()->getNumElements())
494 // If we read all of the bytes we needed from this element we're done.
495 uint64_t NextEltOffset
= SL
->getElementOffset(Index
);
497 if (BytesLeft
<= NextEltOffset
- CurEltOffset
- ByteOffset
)
500 // Move to the next element of the struct.
501 CurPtr
+= NextEltOffset
- CurEltOffset
- ByteOffset
;
502 BytesLeft
-= NextEltOffset
- CurEltOffset
- ByteOffset
;
504 CurEltOffset
= NextEltOffset
;
509 if (isa
<ConstantArray
>(C
) || isa
<ConstantVector
>(C
) ||
510 isa
<ConstantDataSequential
>(C
)) {
513 if (auto *AT
= dyn_cast
<ArrayType
>(C
->getType())) {
514 NumElts
= AT
->getNumElements();
515 EltTy
= AT
->getElementType();
517 NumElts
= cast
<FixedVectorType
>(C
->getType())->getNumElements();
518 EltTy
= cast
<FixedVectorType
>(C
->getType())->getElementType();
520 uint64_t EltSize
= DL
.getTypeAllocSize(EltTy
);
521 uint64_t Index
= ByteOffset
/ EltSize
;
522 uint64_t Offset
= ByteOffset
- Index
* EltSize
;
524 for (; Index
!= NumElts
; ++Index
) {
525 if (!ReadDataFromGlobal(C
->getAggregateElement(Index
), Offset
, CurPtr
,
529 uint64_t BytesWritten
= EltSize
- Offset
;
530 assert(BytesWritten
<= EltSize
&& "Not indexing into this element?");
531 if (BytesWritten
>= BytesLeft
)
535 BytesLeft
-= BytesWritten
;
536 CurPtr
+= BytesWritten
;
541 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
542 if (CE
->getOpcode() == Instruction::IntToPtr
&&
543 CE
->getOperand(0)->getType() == DL
.getIntPtrType(CE
->getType())) {
544 return ReadDataFromGlobal(CE
->getOperand(0), ByteOffset
, CurPtr
,
549 // Otherwise, unknown initializer type.
553 Constant
*FoldReinterpretLoadFromConstPtr(Constant
*C
, Type
*LoadTy
,
554 const DataLayout
&DL
) {
555 // Bail out early. Not expect to load from scalable global variable.
556 if (isa
<ScalableVectorType
>(LoadTy
))
559 auto *PTy
= cast
<PointerType
>(C
->getType());
560 auto *IntType
= dyn_cast
<IntegerType
>(LoadTy
);
562 // If this isn't an integer load we can't fold it directly.
564 unsigned AS
= PTy
->getAddressSpace();
566 // If this is a float/double load, we can try folding it as an int32/64 load
567 // and then bitcast the result. This can be useful for union cases. Note
568 // that address spaces don't matter here since we're not going to result in
569 // an actual new load.
571 if (LoadTy
->isHalfTy())
572 MapTy
= Type::getInt16Ty(C
->getContext());
573 else if (LoadTy
->isFloatTy())
574 MapTy
= Type::getInt32Ty(C
->getContext());
575 else if (LoadTy
->isDoubleTy())
576 MapTy
= Type::getInt64Ty(C
->getContext());
577 else if (LoadTy
->isVectorTy()) {
578 MapTy
= PointerType::getIntNTy(
579 C
->getContext(), DL
.getTypeSizeInBits(LoadTy
).getFixedSize());
583 C
= FoldBitCast(C
, MapTy
->getPointerTo(AS
), DL
);
584 if (Constant
*Res
= FoldReinterpretLoadFromConstPtr(C
, MapTy
, DL
)) {
585 if (Res
->isNullValue() && !LoadTy
->isX86_MMXTy() &&
586 !LoadTy
->isX86_AMXTy())
587 // Materializing a zero can be done trivially without a bitcast
588 return Constant::getNullValue(LoadTy
);
589 Type
*CastTy
= LoadTy
->isPtrOrPtrVectorTy() ? DL
.getIntPtrType(LoadTy
) : LoadTy
;
590 Res
= FoldBitCast(Res
, CastTy
, DL
);
591 if (LoadTy
->isPtrOrPtrVectorTy()) {
592 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
593 if (Res
->isNullValue() && !LoadTy
->isX86_MMXTy() &&
594 !LoadTy
->isX86_AMXTy())
595 return Constant::getNullValue(LoadTy
);
596 if (DL
.isNonIntegralPointerType(LoadTy
->getScalarType()))
597 // Be careful not to replace a load of an addrspace value with an inttoptr here
599 Res
= ConstantExpr::getCast(Instruction::IntToPtr
, Res
, LoadTy
);
606 unsigned BytesLoaded
= (IntType
->getBitWidth() + 7) / 8;
607 if (BytesLoaded
> 32 || BytesLoaded
== 0)
612 if (!IsConstantOffsetFromGlobal(C
, GVal
, OffsetAI
, DL
))
615 auto *GV
= dyn_cast
<GlobalVariable
>(GVal
);
616 if (!GV
|| !GV
->isConstant() || !GV
->hasDefinitiveInitializer() ||
617 !GV
->getInitializer()->getType()->isSized())
620 int64_t Offset
= OffsetAI
.getSExtValue();
621 int64_t InitializerSize
=
622 DL
.getTypeAllocSize(GV
->getInitializer()->getType()).getFixedSize();
624 // If we're not accessing anything in this constant, the result is undefined.
625 if (Offset
<= -1 * static_cast<int64_t>(BytesLoaded
))
626 return UndefValue::get(IntType
);
628 // If we're not accessing anything in this constant, the result is undefined.
629 if (Offset
>= InitializerSize
)
630 return UndefValue::get(IntType
);
632 unsigned char RawBytes
[32] = {0};
633 unsigned char *CurPtr
= RawBytes
;
634 unsigned BytesLeft
= BytesLoaded
;
636 // If we're loading off the beginning of the global, some bytes may be valid.
643 if (!ReadDataFromGlobal(GV
->getInitializer(), Offset
, CurPtr
, BytesLeft
, DL
))
646 APInt ResultVal
= APInt(IntType
->getBitWidth(), 0);
647 if (DL
.isLittleEndian()) {
648 ResultVal
= RawBytes
[BytesLoaded
- 1];
649 for (unsigned i
= 1; i
!= BytesLoaded
; ++i
) {
651 ResultVal
|= RawBytes
[BytesLoaded
- 1 - i
];
654 ResultVal
= RawBytes
[0];
655 for (unsigned i
= 1; i
!= BytesLoaded
; ++i
) {
657 ResultVal
|= RawBytes
[i
];
661 return ConstantInt::get(IntType
->getContext(), ResultVal
);
664 Constant
*ConstantFoldLoadThroughBitcastExpr(ConstantExpr
*CE
, Type
*DestTy
,
665 const DataLayout
&DL
) {
666 auto *SrcPtr
= CE
->getOperand(0);
667 if (!SrcPtr
->getType()->isPointerTy())
670 return ConstantFoldLoadFromConstPtr(SrcPtr
, DestTy
, DL
);
673 } // end anonymous namespace
675 Constant
*llvm::ConstantFoldLoadFromConstPtr(Constant
*C
, Type
*Ty
,
676 const DataLayout
&DL
) {
677 // First, try the easy cases:
678 if (auto *GV
= dyn_cast
<GlobalVariable
>(C
))
679 if (GV
->isConstant() && GV
->hasDefinitiveInitializer())
680 return ConstantFoldLoadThroughBitcast(GV
->getInitializer(), Ty
, DL
);
682 if (auto *GA
= dyn_cast
<GlobalAlias
>(C
))
683 if (GA
->getAliasee() && !GA
->isInterposable())
684 return ConstantFoldLoadFromConstPtr(GA
->getAliasee(), Ty
, DL
);
686 // If the loaded value isn't a constant expr, we can't handle it.
687 auto *CE
= dyn_cast
<ConstantExpr
>(C
);
691 if (CE
->getOpcode() == Instruction::GetElementPtr
) {
692 if (auto *GV
= dyn_cast
<GlobalVariable
>(CE
->getOperand(0))) {
693 if (GV
->isConstant() && GV
->hasDefinitiveInitializer()) {
694 if (Constant
*V
= ConstantFoldLoadThroughGEPConstantExpr(
695 GV
->getInitializer(), CE
, Ty
, DL
))
699 // Try to simplify GEP if the pointer operand wasn't a GlobalVariable.
700 // SymbolicallyEvaluateGEP() with `ForLoadOperand = true` can potentially
701 // simplify the GEP more than it normally would have been, but should only
702 // be used for const folding loads.
703 SmallVector
<Constant
*> Ops
;
704 for (unsigned I
= 0, E
= CE
->getNumOperands(); I
!= E
; ++I
)
705 Ops
.push_back(cast
<Constant
>(CE
->getOperand(I
)));
706 if (auto *Simplified
= dyn_cast_or_null
<ConstantExpr
>(
707 SymbolicallyEvaluateGEP(cast
<GEPOperator
>(CE
), Ops
, DL
, nullptr,
708 /*ForLoadOperand*/ true))) {
709 // If the symbolically evaluated GEP is another GEP, we can only const
710 // fold it if the resulting pointer operand is a GlobalValue. Otherwise
711 // there is nothing else to simplify since the GEP is already in the
712 // most simplified form.
713 if (isa
<GEPOperator
>(Simplified
)) {
714 if (auto *GV
= dyn_cast
<GlobalVariable
>(Simplified
->getOperand(0))) {
715 if (GV
->isConstant() && GV
->hasDefinitiveInitializer()) {
716 if (Constant
*V
= ConstantFoldLoadThroughGEPConstantExpr(
717 GV
->getInitializer(), Simplified
, Ty
, DL
))
722 return ConstantFoldLoadFromConstPtr(Simplified
, Ty
, DL
);
728 if (CE
->getOpcode() == Instruction::BitCast
)
729 if (Constant
*LoadedC
= ConstantFoldLoadThroughBitcastExpr(CE
, Ty
, DL
))
732 // Instead of loading constant c string, use corresponding integer value
733 // directly if string length is small enough.
735 if (getConstantStringInfo(CE
, Str
) && !Str
.empty()) {
736 size_t StrLen
= Str
.size();
737 unsigned NumBits
= Ty
->getPrimitiveSizeInBits();
738 // Replace load with immediate integer if the result is an integer or fp
740 if ((NumBits
>> 3) == StrLen
+ 1 && (NumBits
& 7) == 0 &&
741 (isa
<IntegerType
>(Ty
) || Ty
->isFloatingPointTy())) {
742 APInt
StrVal(NumBits
, 0);
743 APInt
SingleChar(NumBits
, 0);
744 if (DL
.isLittleEndian()) {
745 for (unsigned char C
: reverse(Str
.bytes())) {
746 SingleChar
= static_cast<uint64_t>(C
);
747 StrVal
= (StrVal
<< 8) | SingleChar
;
750 for (unsigned char C
: Str
.bytes()) {
751 SingleChar
= static_cast<uint64_t>(C
);
752 StrVal
= (StrVal
<< 8) | SingleChar
;
754 // Append NULL at the end.
756 StrVal
= (StrVal
<< 8) | SingleChar
;
759 Constant
*Res
= ConstantInt::get(CE
->getContext(), StrVal
);
760 if (Ty
->isFloatingPointTy())
761 Res
= ConstantExpr::getBitCast(Res
, Ty
);
766 // If this load comes from anywhere in a constant global, and if the global
767 // is all undef or zero, we know what it loads.
768 if (auto *GV
= dyn_cast
<GlobalVariable
>(getUnderlyingObject(CE
))) {
769 if (GV
->isConstant() && GV
->hasDefinitiveInitializer()) {
770 if (GV
->getInitializer()->isNullValue())
771 return Constant::getNullValue(Ty
);
772 if (isa
<UndefValue
>(GV
->getInitializer()))
773 return UndefValue::get(Ty
);
777 // Try hard to fold loads from bitcasted strange and non-type-safe things.
778 return FoldReinterpretLoadFromConstPtr(CE
, Ty
, DL
);
783 /// One of Op0/Op1 is a constant expression.
784 /// Attempt to symbolically evaluate the result of a binary operator merging
785 /// these together. If target data info is available, it is provided as DL,
786 /// otherwise DL is null.
787 Constant
*SymbolicallyEvaluateBinop(unsigned Opc
, Constant
*Op0
, Constant
*Op1
,
788 const DataLayout
&DL
) {
791 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
792 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
795 if (Opc
== Instruction::And
) {
796 KnownBits Known0
= computeKnownBits(Op0
, DL
);
797 KnownBits Known1
= computeKnownBits(Op1
, DL
);
798 if ((Known1
.One
| Known0
.Zero
).isAllOnesValue()) {
799 // All the bits of Op0 that the 'and' could be masking are already zero.
802 if ((Known0
.One
| Known1
.Zero
).isAllOnesValue()) {
803 // All the bits of Op1 that the 'and' could be masking are already zero.
808 if (Known0
.isConstant())
809 return ConstantInt::get(Op0
->getType(), Known0
.getConstant());
812 // If the constant expr is something like &A[123] - &A[4].f, fold this into a
813 // constant. This happens frequently when iterating over a global array.
814 if (Opc
== Instruction::Sub
) {
815 GlobalValue
*GV1
, *GV2
;
818 if (IsConstantOffsetFromGlobal(Op0
, GV1
, Offs1
, DL
))
819 if (IsConstantOffsetFromGlobal(Op1
, GV2
, Offs2
, DL
) && GV1
== GV2
) {
820 unsigned OpSize
= DL
.getTypeSizeInBits(Op0
->getType());
822 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
823 // PtrToInt may change the bitwidth so we have convert to the right size
825 return ConstantInt::get(Op0
->getType(), Offs1
.zextOrTrunc(OpSize
) -
826 Offs2
.zextOrTrunc(OpSize
));
833 /// If array indices are not pointer-sized integers, explicitly cast them so
834 /// that they aren't implicitly casted by the getelementptr.
835 Constant
*CastGEPIndices(Type
*SrcElemTy
, ArrayRef
<Constant
*> Ops
,
836 Type
*ResultTy
, Optional
<unsigned> InRangeIndex
,
837 const DataLayout
&DL
, const TargetLibraryInfo
*TLI
) {
838 Type
*IntIdxTy
= DL
.getIndexType(ResultTy
);
839 Type
*IntIdxScalarTy
= IntIdxTy
->getScalarType();
842 SmallVector
<Constant
*, 32> NewIdxs
;
843 for (unsigned i
= 1, e
= Ops
.size(); i
!= e
; ++i
) {
845 !isa
<StructType
>(GetElementPtrInst::getIndexedType(
846 SrcElemTy
, Ops
.slice(1, i
- 1)))) &&
847 Ops
[i
]->getType()->getScalarType() != IntIdxScalarTy
) {
849 Type
*NewType
= Ops
[i
]->getType()->isVectorTy()
852 NewIdxs
.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops
[i
],
858 NewIdxs
.push_back(Ops
[i
]);
864 Constant
*C
= ConstantExpr::getGetElementPtr(
865 SrcElemTy
, Ops
[0], NewIdxs
, /*InBounds=*/false, InRangeIndex
);
866 return ConstantFoldConstant(C
, DL
, TLI
);
869 /// Strip the pointer casts, but preserve the address space information.
870 Constant
*StripPtrCastKeepAS(Constant
*Ptr
, bool ForLoadOperand
) {
871 assert(Ptr
->getType()->isPointerTy() && "Not a pointer type");
872 auto *OldPtrTy
= cast
<PointerType
>(Ptr
->getType());
873 Ptr
= cast
<Constant
>(Ptr
->stripPointerCasts());
874 if (ForLoadOperand
) {
875 while (isa
<GlobalAlias
>(Ptr
) && !cast
<GlobalAlias
>(Ptr
)->isInterposable() &&
876 !cast
<GlobalAlias
>(Ptr
)->getBaseObject()->isInterposable()) {
877 Ptr
= cast
<GlobalAlias
>(Ptr
)->getAliasee();
881 auto *NewPtrTy
= cast
<PointerType
>(Ptr
->getType());
883 // Preserve the address space number of the pointer.
884 if (NewPtrTy
->getAddressSpace() != OldPtrTy
->getAddressSpace()) {
885 Ptr
= ConstantExpr::getPointerCast(
886 Ptr
, PointerType::getWithSamePointeeType(NewPtrTy
,
887 OldPtrTy
->getAddressSpace()));
892 /// If we can symbolically evaluate the GEP constant expression, do so.
893 Constant
*SymbolicallyEvaluateGEP(const GEPOperator
*GEP
,
894 ArrayRef
<Constant
*> Ops
,
895 const DataLayout
&DL
,
896 const TargetLibraryInfo
*TLI
,
897 bool ForLoadOperand
) {
898 const GEPOperator
*InnermostGEP
= GEP
;
899 bool InBounds
= GEP
->isInBounds();
901 Type
*SrcElemTy
= GEP
->getSourceElementType();
902 Type
*ResElemTy
= GEP
->getResultElementType();
903 Type
*ResTy
= GEP
->getType();
904 if (!SrcElemTy
->isSized() || isa
<ScalableVectorType
>(SrcElemTy
))
907 if (Constant
*C
= CastGEPIndices(SrcElemTy
, Ops
, ResTy
,
908 GEP
->getInRangeIndex(), DL
, TLI
))
911 Constant
*Ptr
= Ops
[0];
912 if (!Ptr
->getType()->isPointerTy())
915 Type
*IntIdxTy
= DL
.getIndexType(Ptr
->getType());
917 // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
918 // "inttoptr (sub (ptrtoint Ptr), V)"
919 if (Ops
.size() == 2 && ResElemTy
->isIntegerTy(8)) {
920 auto *CE
= dyn_cast
<ConstantExpr
>(Ops
[1]);
921 assert((!CE
|| CE
->getType() == IntIdxTy
) &&
922 "CastGEPIndices didn't canonicalize index types!");
923 if (CE
&& CE
->getOpcode() == Instruction::Sub
&&
924 CE
->getOperand(0)->isNullValue()) {
925 Constant
*Res
= ConstantExpr::getPtrToInt(Ptr
, CE
->getType());
926 Res
= ConstantExpr::getSub(Res
, CE
->getOperand(1));
927 Res
= ConstantExpr::getIntToPtr(Res
, ResTy
);
928 return ConstantFoldConstant(Res
, DL
, TLI
);
932 for (unsigned i
= 1, e
= Ops
.size(); i
!= e
; ++i
)
933 if (!isa
<ConstantInt
>(Ops
[i
]))
936 unsigned BitWidth
= DL
.getTypeSizeInBits(IntIdxTy
);
939 DL
.getIndexedOffsetInType(
941 makeArrayRef((Value
* const *)Ops
.data() + 1, Ops
.size() - 1)));
942 Ptr
= StripPtrCastKeepAS(Ptr
, ForLoadOperand
);
944 // If this is a GEP of a GEP, fold it all into a single GEP.
945 while (auto *GEP
= dyn_cast
<GEPOperator
>(Ptr
)) {
947 InBounds
&= GEP
->isInBounds();
949 SmallVector
<Value
*, 4> NestedOps(GEP
->op_begin() + 1, GEP
->op_end());
951 // Do not try the incorporate the sub-GEP if some index is not a number.
952 bool AllConstantInt
= true;
953 for (Value
*NestedOp
: NestedOps
)
954 if (!isa
<ConstantInt
>(NestedOp
)) {
955 AllConstantInt
= false;
961 Ptr
= cast
<Constant
>(GEP
->getOperand(0));
962 SrcElemTy
= GEP
->getSourceElementType();
963 Offset
+= APInt(BitWidth
, DL
.getIndexedOffsetInType(SrcElemTy
, NestedOps
));
964 Ptr
= StripPtrCastKeepAS(Ptr
, ForLoadOperand
);
967 // If the base value for this address is a literal integer value, fold the
968 // getelementptr to the resulting integer value casted to the pointer type.
969 APInt
BasePtr(BitWidth
, 0);
970 if (auto *CE
= dyn_cast
<ConstantExpr
>(Ptr
)) {
971 if (CE
->getOpcode() == Instruction::IntToPtr
) {
972 if (auto *Base
= dyn_cast
<ConstantInt
>(CE
->getOperand(0)))
973 BasePtr
= Base
->getValue().zextOrTrunc(BitWidth
);
977 auto *PTy
= cast
<PointerType
>(Ptr
->getType());
978 if ((Ptr
->isNullValue() || BasePtr
!= 0) &&
979 !DL
.isNonIntegralPointerType(PTy
)) {
980 Constant
*C
= ConstantInt::get(Ptr
->getContext(), Offset
+ BasePtr
);
981 return ConstantExpr::getIntToPtr(C
, ResTy
);
984 // Otherwise form a regular getelementptr. Recompute the indices so that
985 // we eliminate over-indexing of the notional static type array bounds.
986 // This makes it easy to determine if the getelementptr is "inbounds".
987 // Also, this helps GlobalOpt do SROA on GlobalVariables.
988 SmallVector
<Constant
*, 32> NewIdxs
;
990 SrcElemTy
= PTy
->getElementType();
993 if (!Ty
->isStructTy()) {
994 if (Ty
->isPointerTy()) {
995 // The only pointer indexing we'll do is on the first index of the GEP.
996 if (!NewIdxs
.empty())
1001 // Only handle pointers to sized types, not pointers to functions.
1005 Type
*NextTy
= GetElementPtrInst::getTypeAtIndex(Ty
, (uint64_t)0);
1011 // Determine which element of the array the offset points into.
1012 APInt
ElemSize(BitWidth
, DL
.getTypeAllocSize(Ty
));
1013 if (ElemSize
== 0) {
1014 // The element size is 0. This may be [0 x Ty]*, so just use a zero
1015 // index for this level and proceed to the next level to see if it can
1016 // accommodate the offset.
1017 NewIdxs
.push_back(ConstantInt::get(IntIdxTy
, 0));
1019 // The element size is non-zero divide the offset by the element
1020 // size (rounding down), to compute the index at this level.
1022 APInt NewIdx
= Offset
.sdiv_ov(ElemSize
, Overflow
);
1025 Offset
-= NewIdx
* ElemSize
;
1026 NewIdxs
.push_back(ConstantInt::get(IntIdxTy
, NewIdx
));
1029 auto *STy
= cast
<StructType
>(Ty
);
1030 // If we end up with an offset that isn't valid for this struct type, we
1031 // can't re-form this GEP in a regular form, so bail out. The pointer
1032 // operand likely went through casts that are necessary to make the GEP
1034 const StructLayout
&SL
= *DL
.getStructLayout(STy
);
1035 if (Offset
.isNegative() || Offset
.uge(SL
.getSizeInBytes()))
1038 // Determine which field of the struct the offset points into. The
1039 // getZExtValue is fine as we've already ensured that the offset is
1040 // within the range representable by the StructLayout API.
1041 unsigned ElIdx
= SL
.getElementContainingOffset(Offset
.getZExtValue());
1042 NewIdxs
.push_back(ConstantInt::get(Type::getInt32Ty(Ty
->getContext()),
1044 Offset
-= APInt(BitWidth
, SL
.getElementOffset(ElIdx
));
1045 Ty
= STy
->getTypeAtIndex(ElIdx
);
1047 } while (Ty
!= ResElemTy
);
1049 // If we haven't used up the entire offset by descending the static
1050 // type, then the offset is pointing into the middle of an indivisible
1051 // member, so we can't simplify it.
1055 // Preserve the inrange index from the innermost GEP if possible. We must
1056 // have calculated the same indices up to and including the inrange index.
1057 Optional
<unsigned> InRangeIndex
;
1058 if (Optional
<unsigned> LastIRIndex
= InnermostGEP
->getInRangeIndex())
1059 if (SrcElemTy
== InnermostGEP
->getSourceElementType() &&
1060 NewIdxs
.size() > *LastIRIndex
) {
1061 InRangeIndex
= LastIRIndex
;
1062 for (unsigned I
= 0; I
<= *LastIRIndex
; ++I
)
1063 if (NewIdxs
[I
] != InnermostGEP
->getOperand(I
+ 1))
1068 Constant
*C
= ConstantExpr::getGetElementPtr(SrcElemTy
, Ptr
, NewIdxs
,
1069 InBounds
, InRangeIndex
);
1070 assert(C
->getType()->getPointerElementType() == Ty
&&
1071 "Computed GetElementPtr has unexpected type!");
1073 // If we ended up indexing a member with a type that doesn't match
1074 // the type of what the original indices indexed, add a cast.
1075 if (C
->getType() != ResTy
)
1076 C
= FoldBitCast(C
, ResTy
, DL
);
1081 /// Attempt to constant fold an instruction with the
1082 /// specified opcode and operands. If successful, the constant result is
1083 /// returned, if not, null is returned. Note that this function can fail when
1084 /// attempting to fold instructions like loads and stores, which have no
1085 /// constant expression form.
1086 Constant
*ConstantFoldInstOperandsImpl(const Value
*InstOrCE
, unsigned Opcode
,
1087 ArrayRef
<Constant
*> Ops
,
1088 const DataLayout
&DL
,
1089 const TargetLibraryInfo
*TLI
) {
1090 Type
*DestTy
= InstOrCE
->getType();
1092 if (Instruction::isUnaryOp(Opcode
))
1093 return ConstantFoldUnaryOpOperand(Opcode
, Ops
[0], DL
);
1095 if (Instruction::isBinaryOp(Opcode
))
1096 return ConstantFoldBinaryOpOperands(Opcode
, Ops
[0], Ops
[1], DL
);
1098 if (Instruction::isCast(Opcode
))
1099 return ConstantFoldCastOperand(Opcode
, Ops
[0], DestTy
, DL
);
1101 if (auto *GEP
= dyn_cast
<GEPOperator
>(InstOrCE
)) {
1102 if (Constant
*C
= SymbolicallyEvaluateGEP(GEP
, Ops
, DL
, TLI
,
1103 /*ForLoadOperand*/ false))
1106 return ConstantExpr::getGetElementPtr(GEP
->getSourceElementType(), Ops
[0],
1107 Ops
.slice(1), GEP
->isInBounds(),
1108 GEP
->getInRangeIndex());
1111 if (auto *CE
= dyn_cast
<ConstantExpr
>(InstOrCE
))
1112 return CE
->getWithOperands(Ops
);
1115 default: return nullptr;
1116 case Instruction::ICmp
:
1117 case Instruction::FCmp
: llvm_unreachable("Invalid for compares");
1118 case Instruction::Freeze
:
1119 return isGuaranteedNotToBeUndefOrPoison(Ops
[0]) ? Ops
[0] : nullptr;
1120 case Instruction::Call
:
1121 if (auto *F
= dyn_cast
<Function
>(Ops
.back())) {
1122 const auto *Call
= cast
<CallBase
>(InstOrCE
);
1123 if (canConstantFoldCallTo(Call
, F
))
1124 return ConstantFoldCall(Call
, F
, Ops
.slice(0, Ops
.size() - 1), TLI
);
1127 case Instruction::Select
:
1128 return ConstantExpr::getSelect(Ops
[0], Ops
[1], Ops
[2]);
1129 case Instruction::ExtractElement
:
1130 return ConstantExpr::getExtractElement(Ops
[0], Ops
[1]);
1131 case Instruction::ExtractValue
:
1132 return ConstantExpr::getExtractValue(
1133 Ops
[0], cast
<ExtractValueInst
>(InstOrCE
)->getIndices());
1134 case Instruction::InsertElement
:
1135 return ConstantExpr::getInsertElement(Ops
[0], Ops
[1], Ops
[2]);
1136 case Instruction::ShuffleVector
:
1137 return ConstantExpr::getShuffleVector(
1138 Ops
[0], Ops
[1], cast
<ShuffleVectorInst
>(InstOrCE
)->getShuffleMask());
1142 } // end anonymous namespace
1144 //===----------------------------------------------------------------------===//
1145 // Constant Folding public APIs
1146 //===----------------------------------------------------------------------===//
1151 ConstantFoldConstantImpl(const Constant
*C
, const DataLayout
&DL
,
1152 const TargetLibraryInfo
*TLI
,
1153 SmallDenseMap
<Constant
*, Constant
*> &FoldedOps
) {
1154 if (!isa
<ConstantVector
>(C
) && !isa
<ConstantExpr
>(C
))
1155 return const_cast<Constant
*>(C
);
1157 SmallVector
<Constant
*, 8> Ops
;
1158 for (const Use
&OldU
: C
->operands()) {
1159 Constant
*OldC
= cast
<Constant
>(&OldU
);
1160 Constant
*NewC
= OldC
;
1161 // Recursively fold the ConstantExpr's operands. If we have already folded
1162 // a ConstantExpr, we don't have to process it again.
1163 if (isa
<ConstantVector
>(OldC
) || isa
<ConstantExpr
>(OldC
)) {
1164 auto It
= FoldedOps
.find(OldC
);
1165 if (It
== FoldedOps
.end()) {
1166 NewC
= ConstantFoldConstantImpl(OldC
, DL
, TLI
, FoldedOps
);
1167 FoldedOps
.insert({OldC
, NewC
});
1172 Ops
.push_back(NewC
);
1175 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
1176 if (CE
->isCompare())
1177 return ConstantFoldCompareInstOperands(CE
->getPredicate(), Ops
[0], Ops
[1],
1180 return ConstantFoldInstOperandsImpl(CE
, CE
->getOpcode(), Ops
, DL
, TLI
);
1183 assert(isa
<ConstantVector
>(C
));
1184 return ConstantVector::get(Ops
);
1187 } // end anonymous namespace
1189 Constant
*llvm::ConstantFoldInstruction(Instruction
*I
, const DataLayout
&DL
,
1190 const TargetLibraryInfo
*TLI
) {
1191 // Handle PHI nodes quickly here...
1192 if (auto *PN
= dyn_cast
<PHINode
>(I
)) {
1193 Constant
*CommonValue
= nullptr;
1195 SmallDenseMap
<Constant
*, Constant
*> FoldedOps
;
1196 for (Value
*Incoming
: PN
->incoming_values()) {
1197 // If the incoming value is undef then skip it. Note that while we could
1198 // skip the value if it is equal to the phi node itself we choose not to
1199 // because that would break the rule that constant folding only applies if
1200 // all operands are constants.
1201 if (isa
<UndefValue
>(Incoming
))
1203 // If the incoming value is not a constant, then give up.
1204 auto *C
= dyn_cast
<Constant
>(Incoming
);
1207 // Fold the PHI's operands.
1208 C
= ConstantFoldConstantImpl(C
, DL
, TLI
, FoldedOps
);
1209 // If the incoming value is a different constant to
1210 // the one we saw previously, then give up.
1211 if (CommonValue
&& C
!= CommonValue
)
1216 // If we reach here, all incoming values are the same constant or undef.
1217 return CommonValue
? CommonValue
: UndefValue::get(PN
->getType());
1220 // Scan the operand list, checking to see if they are all constants, if so,
1221 // hand off to ConstantFoldInstOperandsImpl.
1222 if (!all_of(I
->operands(), [](Use
&U
) { return isa
<Constant
>(U
); }))
1225 SmallDenseMap
<Constant
*, Constant
*> FoldedOps
;
1226 SmallVector
<Constant
*, 8> Ops
;
1227 for (const Use
&OpU
: I
->operands()) {
1228 auto *Op
= cast
<Constant
>(&OpU
);
1229 // Fold the Instruction's operands.
1230 Op
= ConstantFoldConstantImpl(Op
, DL
, TLI
, FoldedOps
);
1234 if (const auto *CI
= dyn_cast
<CmpInst
>(I
))
1235 return ConstantFoldCompareInstOperands(CI
->getPredicate(), Ops
[0], Ops
[1],
1238 if (const auto *LI
= dyn_cast
<LoadInst
>(I
)) {
1239 if (LI
->isVolatile())
1241 return ConstantFoldLoadFromConstPtr(Ops
[0], LI
->getType(), DL
);
1244 if (auto *IVI
= dyn_cast
<InsertValueInst
>(I
))
1245 return ConstantExpr::getInsertValue(Ops
[0], Ops
[1], IVI
->getIndices());
1247 if (auto *EVI
= dyn_cast
<ExtractValueInst
>(I
))
1248 return ConstantExpr::getExtractValue(Ops
[0], EVI
->getIndices());
1250 return ConstantFoldInstOperands(I
, Ops
, DL
, TLI
);
1253 Constant
*llvm::ConstantFoldConstant(const Constant
*C
, const DataLayout
&DL
,
1254 const TargetLibraryInfo
*TLI
) {
1255 SmallDenseMap
<Constant
*, Constant
*> FoldedOps
;
1256 return ConstantFoldConstantImpl(C
, DL
, TLI
, FoldedOps
);
1259 Constant
*llvm::ConstantFoldInstOperands(Instruction
*I
,
1260 ArrayRef
<Constant
*> Ops
,
1261 const DataLayout
&DL
,
1262 const TargetLibraryInfo
*TLI
) {
1263 return ConstantFoldInstOperandsImpl(I
, I
->getOpcode(), Ops
, DL
, TLI
);
1266 Constant
*llvm::ConstantFoldCompareInstOperands(unsigned Predicate
,
1267 Constant
*Ops0
, Constant
*Ops1
,
1268 const DataLayout
&DL
,
1269 const TargetLibraryInfo
*TLI
) {
1270 // fold: icmp (inttoptr x), null -> icmp x, 0
1271 // fold: icmp null, (inttoptr x) -> icmp 0, x
1272 // fold: icmp (ptrtoint x), 0 -> icmp x, null
1273 // fold: icmp 0, (ptrtoint x) -> icmp null, x
1274 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
1275 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
1277 // FIXME: The following comment is out of data and the DataLayout is here now.
1278 // ConstantExpr::getCompare cannot do this, because it doesn't have DL
1279 // around to know if bit truncation is happening.
1280 if (auto *CE0
= dyn_cast
<ConstantExpr
>(Ops0
)) {
1281 if (Ops1
->isNullValue()) {
1282 if (CE0
->getOpcode() == Instruction::IntToPtr
) {
1283 Type
*IntPtrTy
= DL
.getIntPtrType(CE0
->getType());
1284 // Convert the integer value to the right size to ensure we get the
1285 // proper extension or truncation.
1286 Constant
*C
= ConstantExpr::getIntegerCast(CE0
->getOperand(0),
1288 Constant
*Null
= Constant::getNullValue(C
->getType());
1289 return ConstantFoldCompareInstOperands(Predicate
, C
, Null
, DL
, TLI
);
1292 // Only do this transformation if the int is intptrty in size, otherwise
1293 // there is a truncation or extension that we aren't modeling.
1294 if (CE0
->getOpcode() == Instruction::PtrToInt
) {
1295 Type
*IntPtrTy
= DL
.getIntPtrType(CE0
->getOperand(0)->getType());
1296 if (CE0
->getType() == IntPtrTy
) {
1297 Constant
*C
= CE0
->getOperand(0);
1298 Constant
*Null
= Constant::getNullValue(C
->getType());
1299 return ConstantFoldCompareInstOperands(Predicate
, C
, Null
, DL
, TLI
);
1304 if (auto *CE1
= dyn_cast
<ConstantExpr
>(Ops1
)) {
1305 if (CE0
->getOpcode() == CE1
->getOpcode()) {
1306 if (CE0
->getOpcode() == Instruction::IntToPtr
) {
1307 Type
*IntPtrTy
= DL
.getIntPtrType(CE0
->getType());
1309 // Convert the integer value to the right size to ensure we get the
1310 // proper extension or truncation.
1311 Constant
*C0
= ConstantExpr::getIntegerCast(CE0
->getOperand(0),
1313 Constant
*C1
= ConstantExpr::getIntegerCast(CE1
->getOperand(0),
1315 return ConstantFoldCompareInstOperands(Predicate
, C0
, C1
, DL
, TLI
);
1318 // Only do this transformation if the int is intptrty in size, otherwise
1319 // there is a truncation or extension that we aren't modeling.
1320 if (CE0
->getOpcode() == Instruction::PtrToInt
) {
1321 Type
*IntPtrTy
= DL
.getIntPtrType(CE0
->getOperand(0)->getType());
1322 if (CE0
->getType() == IntPtrTy
&&
1323 CE0
->getOperand(0)->getType() == CE1
->getOperand(0)->getType()) {
1324 return ConstantFoldCompareInstOperands(
1325 Predicate
, CE0
->getOperand(0), CE1
->getOperand(0), DL
, TLI
);
1331 // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
1332 // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
1333 if ((Predicate
== ICmpInst::ICMP_EQ
|| Predicate
== ICmpInst::ICMP_NE
) &&
1334 CE0
->getOpcode() == Instruction::Or
&& Ops1
->isNullValue()) {
1335 Constant
*LHS
= ConstantFoldCompareInstOperands(
1336 Predicate
, CE0
->getOperand(0), Ops1
, DL
, TLI
);
1337 Constant
*RHS
= ConstantFoldCompareInstOperands(
1338 Predicate
, CE0
->getOperand(1), Ops1
, DL
, TLI
);
1340 Predicate
== ICmpInst::ICMP_EQ
? Instruction::And
: Instruction::Or
;
1341 return ConstantFoldBinaryOpOperands(OpC
, LHS
, RHS
, DL
);
1343 } else if (isa
<ConstantExpr
>(Ops1
)) {
1344 // If RHS is a constant expression, but the left side isn't, swap the
1345 // operands and try again.
1346 Predicate
= ICmpInst::getSwappedPredicate((ICmpInst::Predicate
)Predicate
);
1347 return ConstantFoldCompareInstOperands(Predicate
, Ops1
, Ops0
, DL
, TLI
);
1350 return ConstantExpr::getCompare(Predicate
, Ops0
, Ops1
);
1353 Constant
*llvm::ConstantFoldUnaryOpOperand(unsigned Opcode
, Constant
*Op
,
1354 const DataLayout
&DL
) {
1355 assert(Instruction::isUnaryOp(Opcode
));
1357 return ConstantExpr::get(Opcode
, Op
);
1360 Constant
*llvm::ConstantFoldBinaryOpOperands(unsigned Opcode
, Constant
*LHS
,
1362 const DataLayout
&DL
) {
1363 assert(Instruction::isBinaryOp(Opcode
));
1364 if (isa
<ConstantExpr
>(LHS
) || isa
<ConstantExpr
>(RHS
))
1365 if (Constant
*C
= SymbolicallyEvaluateBinop(Opcode
, LHS
, RHS
, DL
))
1368 return ConstantExpr::get(Opcode
, LHS
, RHS
);
1371 Constant
*llvm::ConstantFoldCastOperand(unsigned Opcode
, Constant
*C
,
1372 Type
*DestTy
, const DataLayout
&DL
) {
1373 assert(Instruction::isCast(Opcode
));
1376 llvm_unreachable("Missing case");
1377 case Instruction::PtrToInt
:
1378 // If the input is a inttoptr, eliminate the pair. This requires knowing
1379 // the width of a pointer, so it can't be done in ConstantExpr::getCast.
1380 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
1381 if (CE
->getOpcode() == Instruction::IntToPtr
) {
1382 Constant
*Input
= CE
->getOperand(0);
1383 unsigned InWidth
= Input
->getType()->getScalarSizeInBits();
1384 unsigned PtrWidth
= DL
.getPointerTypeSizeInBits(CE
->getType());
1385 if (PtrWidth
< InWidth
) {
1387 ConstantInt::get(CE
->getContext(),
1388 APInt::getLowBitsSet(InWidth
, PtrWidth
));
1389 Input
= ConstantExpr::getAnd(Input
, Mask
);
1391 // Do a zext or trunc to get to the dest size.
1392 return ConstantExpr::getIntegerCast(Input
, DestTy
, false);
1395 return ConstantExpr::getCast(Opcode
, C
, DestTy
);
1396 case Instruction::IntToPtr
:
1397 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
1398 // the int size is >= the ptr size and the address spaces are the same.
1399 // This requires knowing the width of a pointer, so it can't be done in
1400 // ConstantExpr::getCast.
1401 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
1402 if (CE
->getOpcode() == Instruction::PtrToInt
) {
1403 Constant
*SrcPtr
= CE
->getOperand(0);
1404 unsigned SrcPtrSize
= DL
.getPointerTypeSizeInBits(SrcPtr
->getType());
1405 unsigned MidIntSize
= CE
->getType()->getScalarSizeInBits();
1407 if (MidIntSize
>= SrcPtrSize
) {
1408 unsigned SrcAS
= SrcPtr
->getType()->getPointerAddressSpace();
1409 if (SrcAS
== DestTy
->getPointerAddressSpace())
1410 return FoldBitCast(CE
->getOperand(0), DestTy
, DL
);
1415 return ConstantExpr::getCast(Opcode
, C
, DestTy
);
1416 case Instruction::Trunc
:
1417 case Instruction::ZExt
:
1418 case Instruction::SExt
:
1419 case Instruction::FPTrunc
:
1420 case Instruction::FPExt
:
1421 case Instruction::UIToFP
:
1422 case Instruction::SIToFP
:
1423 case Instruction::FPToUI
:
1424 case Instruction::FPToSI
:
1425 case Instruction::AddrSpaceCast
:
1426 return ConstantExpr::getCast(Opcode
, C
, DestTy
);
1427 case Instruction::BitCast
:
1428 return FoldBitCast(C
, DestTy
, DL
);
1432 Constant
*llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant
*C
,
1435 const DataLayout
&DL
) {
1436 if (!CE
->getOperand(1)->isNullValue())
1437 return nullptr; // Do not allow stepping over the value!
1439 // Loop over all of the operands, tracking down which value we are
1441 for (unsigned i
= 2, e
= CE
->getNumOperands(); i
!= e
; ++i
) {
1442 C
= C
->getAggregateElement(CE
->getOperand(i
));
1446 return ConstantFoldLoadThroughBitcast(C
, Ty
, DL
);
1450 llvm::ConstantFoldLoadThroughGEPIndices(Constant
*C
,
1451 ArrayRef
<Constant
*> Indices
) {
1452 // Loop over all of the operands, tracking down which value we are
1454 for (Constant
*Index
: Indices
) {
1455 C
= C
->getAggregateElement(Index
);
1462 //===----------------------------------------------------------------------===//
1463 // Constant Folding for Calls
1466 bool llvm::canConstantFoldCallTo(const CallBase
*Call
, const Function
*F
) {
1467 if (Call
->isNoBuiltin())
1469 switch (F
->getIntrinsicID()) {
1470 // Operations that do not operate floating-point numbers and do not depend on
1471 // FP environment can be folded even in strictfp functions.
1472 case Intrinsic::bswap
:
1473 case Intrinsic::ctpop
:
1474 case Intrinsic::ctlz
:
1475 case Intrinsic::cttz
:
1476 case Intrinsic::fshl
:
1477 case Intrinsic::fshr
:
1478 case Intrinsic::launder_invariant_group
:
1479 case Intrinsic::strip_invariant_group
:
1480 case Intrinsic::masked_load
:
1481 case Intrinsic::get_active_lane_mask
:
1482 case Intrinsic::abs
:
1483 case Intrinsic::smax
:
1484 case Intrinsic::smin
:
1485 case Intrinsic::umax
:
1486 case Intrinsic::umin
:
1487 case Intrinsic::sadd_with_overflow
:
1488 case Intrinsic::uadd_with_overflow
:
1489 case Intrinsic::ssub_with_overflow
:
1490 case Intrinsic::usub_with_overflow
:
1491 case Intrinsic::smul_with_overflow
:
1492 case Intrinsic::umul_with_overflow
:
1493 case Intrinsic::sadd_sat
:
1494 case Intrinsic::uadd_sat
:
1495 case Intrinsic::ssub_sat
:
1496 case Intrinsic::usub_sat
:
1497 case Intrinsic::smul_fix
:
1498 case Intrinsic::smul_fix_sat
:
1499 case Intrinsic::bitreverse
:
1500 case Intrinsic::is_constant
:
1501 case Intrinsic::vector_reduce_add
:
1502 case Intrinsic::vector_reduce_mul
:
1503 case Intrinsic::vector_reduce_and
:
1504 case Intrinsic::vector_reduce_or
:
1505 case Intrinsic::vector_reduce_xor
:
1506 case Intrinsic::vector_reduce_smin
:
1507 case Intrinsic::vector_reduce_smax
:
1508 case Intrinsic::vector_reduce_umin
:
1509 case Intrinsic::vector_reduce_umax
:
1510 // Target intrinsics
1511 case Intrinsic::amdgcn_perm
:
1512 case Intrinsic::arm_mve_vctp8
:
1513 case Intrinsic::arm_mve_vctp16
:
1514 case Intrinsic::arm_mve_vctp32
:
1515 case Intrinsic::arm_mve_vctp64
:
1516 case Intrinsic::aarch64_sve_convert_from_svbool
:
1517 // WebAssembly float semantics are always known
1518 case Intrinsic::wasm_trunc_signed
:
1519 case Intrinsic::wasm_trunc_unsigned
:
1522 // Floating point operations cannot be folded in strictfp functions in
1523 // general case. They can be folded if FP environment is known to compiler.
1524 case Intrinsic::minnum
:
1525 case Intrinsic::maxnum
:
1526 case Intrinsic::minimum
:
1527 case Intrinsic::maximum
:
1528 case Intrinsic::log
:
1529 case Intrinsic::log2
:
1530 case Intrinsic::log10
:
1531 case Intrinsic::exp
:
1532 case Intrinsic::exp2
:
1533 case Intrinsic::sqrt
:
1534 case Intrinsic::sin
:
1535 case Intrinsic::cos
:
1536 case Intrinsic::pow
:
1537 case Intrinsic::powi
:
1538 case Intrinsic::fma
:
1539 case Intrinsic::fmuladd
:
1540 case Intrinsic::fptoui_sat
:
1541 case Intrinsic::fptosi_sat
:
1542 case Intrinsic::convert_from_fp16
:
1543 case Intrinsic::convert_to_fp16
:
1544 case Intrinsic::amdgcn_cos
:
1545 case Intrinsic::amdgcn_cubeid
:
1546 case Intrinsic::amdgcn_cubema
:
1547 case Intrinsic::amdgcn_cubesc
:
1548 case Intrinsic::amdgcn_cubetc
:
1549 case Intrinsic::amdgcn_fmul_legacy
:
1550 case Intrinsic::amdgcn_fma_legacy
:
1551 case Intrinsic::amdgcn_fract
:
1552 case Intrinsic::amdgcn_ldexp
:
1553 case Intrinsic::amdgcn_sin
:
1554 // The intrinsics below depend on rounding mode in MXCSR.
1555 case Intrinsic::x86_sse_cvtss2si
:
1556 case Intrinsic::x86_sse_cvtss2si64
:
1557 case Intrinsic::x86_sse_cvttss2si
:
1558 case Intrinsic::x86_sse_cvttss2si64
:
1559 case Intrinsic::x86_sse2_cvtsd2si
:
1560 case Intrinsic::x86_sse2_cvtsd2si64
:
1561 case Intrinsic::x86_sse2_cvttsd2si
:
1562 case Intrinsic::x86_sse2_cvttsd2si64
:
1563 case Intrinsic::x86_avx512_vcvtss2si32
:
1564 case Intrinsic::x86_avx512_vcvtss2si64
:
1565 case Intrinsic::x86_avx512_cvttss2si
:
1566 case Intrinsic::x86_avx512_cvttss2si64
:
1567 case Intrinsic::x86_avx512_vcvtsd2si32
:
1568 case Intrinsic::x86_avx512_vcvtsd2si64
:
1569 case Intrinsic::x86_avx512_cvttsd2si
:
1570 case Intrinsic::x86_avx512_cvttsd2si64
:
1571 case Intrinsic::x86_avx512_vcvtss2usi32
:
1572 case Intrinsic::x86_avx512_vcvtss2usi64
:
1573 case Intrinsic::x86_avx512_cvttss2usi
:
1574 case Intrinsic::x86_avx512_cvttss2usi64
:
1575 case Intrinsic::x86_avx512_vcvtsd2usi32
:
1576 case Intrinsic::x86_avx512_vcvtsd2usi64
:
1577 case Intrinsic::x86_avx512_cvttsd2usi
:
1578 case Intrinsic::x86_avx512_cvttsd2usi64
:
1579 return !Call
->isStrictFP();
1581 // Sign operations are actually bitwise operations, they do not raise
1582 // exceptions even for SNANs. The same applies to classification functions.
1583 case Intrinsic::fabs
:
1584 case Intrinsic::copysign
:
1585 case Intrinsic::isnan
:
1586 // Non-constrained variants of rounding operations means default FP
1587 // environment, they can be folded in any case.
1588 case Intrinsic::ceil
:
1589 case Intrinsic::floor
:
1590 case Intrinsic::round
:
1591 case Intrinsic::roundeven
:
1592 case Intrinsic::trunc
:
1593 case Intrinsic::nearbyint
:
1594 case Intrinsic::rint
:
1595 // Constrained intrinsics can be folded if FP environment is known
1597 case Intrinsic::experimental_constrained_fma
:
1598 case Intrinsic::experimental_constrained_fmuladd
:
1599 case Intrinsic::experimental_constrained_fadd
:
1600 case Intrinsic::experimental_constrained_fsub
:
1601 case Intrinsic::experimental_constrained_fmul
:
1602 case Intrinsic::experimental_constrained_fdiv
:
1603 case Intrinsic::experimental_constrained_frem
:
1604 case Intrinsic::experimental_constrained_ceil
:
1605 case Intrinsic::experimental_constrained_floor
:
1606 case Intrinsic::experimental_constrained_round
:
1607 case Intrinsic::experimental_constrained_roundeven
:
1608 case Intrinsic::experimental_constrained_trunc
:
1609 case Intrinsic::experimental_constrained_nearbyint
:
1610 case Intrinsic::experimental_constrained_rint
:
1614 case Intrinsic::not_intrinsic
: break;
1617 if (!F
->hasName() || Call
->isStrictFP())
1620 // In these cases, the check of the length is required. We don't want to
1621 // return true for a name like "cos\0blah" which strcmp would return equal to
1622 // "cos", but has length 8.
1623 StringRef Name
= F
->getName();
1628 return Name
== "acos" || Name
== "acosf" ||
1629 Name
== "asin" || Name
== "asinf" ||
1630 Name
== "atan" || Name
== "atanf" ||
1631 Name
== "atan2" || Name
== "atan2f";
1633 return Name
== "ceil" || Name
== "ceilf" ||
1634 Name
== "cos" || Name
== "cosf" ||
1635 Name
== "cosh" || Name
== "coshf";
1637 return Name
== "exp" || Name
== "expf" ||
1638 Name
== "exp2" || Name
== "exp2f";
1640 return Name
== "fabs" || Name
== "fabsf" ||
1641 Name
== "floor" || Name
== "floorf" ||
1642 Name
== "fmod" || Name
== "fmodf";
1644 return Name
== "log" || Name
== "logf" ||
1645 Name
== "log2" || Name
== "log2f" ||
1646 Name
== "log10" || Name
== "log10f";
1648 return Name
== "nearbyint" || Name
== "nearbyintf";
1650 return Name
== "pow" || Name
== "powf";
1652 return Name
== "remainder" || Name
== "remainderf" ||
1653 Name
== "rint" || Name
== "rintf" ||
1654 Name
== "round" || Name
== "roundf";
1656 return Name
== "sin" || Name
== "sinf" ||
1657 Name
== "sinh" || Name
== "sinhf" ||
1658 Name
== "sqrt" || Name
== "sqrtf";
1660 return Name
== "tan" || Name
== "tanf" ||
1661 Name
== "tanh" || Name
== "tanhf" ||
1662 Name
== "trunc" || Name
== "truncf";
1664 // Check for various function names that get used for the math functions
1665 // when the header files are preprocessed with the macro
1666 // __FINITE_MATH_ONLY__ enabled.
1667 // The '12' here is the length of the shortest name that can match.
1668 // We need to check the size before looking at Name[1] and Name[2]
1669 // so we may as well check a limit that will eliminate mismatches.
1670 if (Name
.size() < 12 || Name
[1] != '_')
1676 return Name
== "__acos_finite" || Name
== "__acosf_finite" ||
1677 Name
== "__asin_finite" || Name
== "__asinf_finite" ||
1678 Name
== "__atan2_finite" || Name
== "__atan2f_finite";
1680 return Name
== "__cosh_finite" || Name
== "__coshf_finite";
1682 return Name
== "__exp_finite" || Name
== "__expf_finite" ||
1683 Name
== "__exp2_finite" || Name
== "__exp2f_finite";
1685 return Name
== "__log_finite" || Name
== "__logf_finite" ||
1686 Name
== "__log10_finite" || Name
== "__log10f_finite";
1688 return Name
== "__pow_finite" || Name
== "__powf_finite";
1690 return Name
== "__sinh_finite" || Name
== "__sinhf_finite";
1697 Constant
*GetConstantFoldFPValue(double V
, Type
*Ty
) {
1698 if (Ty
->isHalfTy() || Ty
->isFloatTy()) {
1701 APF
.convert(Ty
->getFltSemantics(), APFloat::rmNearestTiesToEven
, &unused
);
1702 return ConstantFP::get(Ty
->getContext(), APF
);
1704 if (Ty
->isDoubleTy())
1705 return ConstantFP::get(Ty
->getContext(), APFloat(V
));
1706 llvm_unreachable("Can only constant fold half/float/double");
1709 /// Clear the floating-point exception state.
1710 inline void llvm_fenv_clearexcept() {
1711 #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
1712 feclearexcept(FE_ALL_EXCEPT
);
1717 /// Test if a floating-point exception was raised.
1718 inline bool llvm_fenv_testexcept() {
1719 int errno_val
= errno
;
1720 if (errno_val
== ERANGE
|| errno_val
== EDOM
)
1722 #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
1723 if (fetestexcept(FE_ALL_EXCEPT
& ~FE_INEXACT
))
1729 Constant
*ConstantFoldFP(double (*NativeFP
)(double), const APFloat
&V
,
1731 llvm_fenv_clearexcept();
1732 double Result
= NativeFP(V
.convertToDouble());
1733 if (llvm_fenv_testexcept()) {
1734 llvm_fenv_clearexcept();
1738 return GetConstantFoldFPValue(Result
, Ty
);
1741 Constant
*ConstantFoldBinaryFP(double (*NativeFP
)(double, double),
1742 const APFloat
&V
, const APFloat
&W
, Type
*Ty
) {
1743 llvm_fenv_clearexcept();
1744 double Result
= NativeFP(V
.convertToDouble(), W
.convertToDouble());
1745 if (llvm_fenv_testexcept()) {
1746 llvm_fenv_clearexcept();
1750 return GetConstantFoldFPValue(Result
, Ty
);
1753 Constant
*constantFoldVectorReduce(Intrinsic::ID IID
, Constant
*Op
) {
1754 FixedVectorType
*VT
= dyn_cast
<FixedVectorType
>(Op
->getType());
1758 // This isn't strictly necessary, but handle the special/common case of zero:
1759 // all integer reductions of a zero input produce zero.
1760 if (isa
<ConstantAggregateZero
>(Op
))
1761 return ConstantInt::get(VT
->getElementType(), 0);
1763 // This is the same as the underlying binops - poison propagates.
1764 if (isa
<PoisonValue
>(Op
) || Op
->containsPoisonElement())
1765 return PoisonValue::get(VT
->getElementType());
1767 // TODO: Handle undef.
1768 if (!isa
<ConstantVector
>(Op
) && !isa
<ConstantDataVector
>(Op
))
1771 auto *EltC
= dyn_cast
<ConstantInt
>(Op
->getAggregateElement(0U));
1775 APInt Acc
= EltC
->getValue();
1776 for (unsigned I
= 1, E
= VT
->getNumElements(); I
!= E
; I
++) {
1777 if (!(EltC
= dyn_cast
<ConstantInt
>(Op
->getAggregateElement(I
))))
1779 const APInt
&X
= EltC
->getValue();
1781 case Intrinsic::vector_reduce_add
:
1784 case Intrinsic::vector_reduce_mul
:
1787 case Intrinsic::vector_reduce_and
:
1790 case Intrinsic::vector_reduce_or
:
1793 case Intrinsic::vector_reduce_xor
:
1796 case Intrinsic::vector_reduce_smin
:
1797 Acc
= APIntOps::smin(Acc
, X
);
1799 case Intrinsic::vector_reduce_smax
:
1800 Acc
= APIntOps::smax(Acc
, X
);
1802 case Intrinsic::vector_reduce_umin
:
1803 Acc
= APIntOps::umin(Acc
, X
);
1805 case Intrinsic::vector_reduce_umax
:
1806 Acc
= APIntOps::umax(Acc
, X
);
1811 return ConstantInt::get(Op
->getContext(), Acc
);
1814 /// Attempt to fold an SSE floating point to integer conversion of a constant
1815 /// floating point. If roundTowardZero is false, the default IEEE rounding is
1816 /// used (toward nearest, ties to even). This matches the behavior of the
1817 /// non-truncating SSE instructions in the default rounding mode. The desired
1818 /// integer type Ty is used to select how many bits are available for the
1819 /// result. Returns null if the conversion cannot be performed, otherwise
1820 /// returns the Constant value resulting from the conversion.
1821 Constant
*ConstantFoldSSEConvertToInt(const APFloat
&Val
, bool roundTowardZero
,
1822 Type
*Ty
, bool IsSigned
) {
1823 // All of these conversion intrinsics form an integer of at most 64bits.
1824 unsigned ResultWidth
= Ty
->getIntegerBitWidth();
1825 assert(ResultWidth
<= 64 &&
1826 "Can only constant fold conversions to 64 and 32 bit ints");
1829 bool isExact
= false;
1830 APFloat::roundingMode mode
= roundTowardZero
? APFloat::rmTowardZero
1831 : APFloat::rmNearestTiesToEven
;
1832 APFloat::opStatus status
=
1833 Val
.convertToInteger(makeMutableArrayRef(UIntVal
), ResultWidth
,
1834 IsSigned
, mode
, &isExact
);
1835 if (status
!= APFloat::opOK
&&
1836 (!roundTowardZero
|| status
!= APFloat::opInexact
))
1838 return ConstantInt::get(Ty
, UIntVal
, IsSigned
);
1841 double getValueAsDouble(ConstantFP
*Op
) {
1842 Type
*Ty
= Op
->getType();
1844 if (Ty
->isBFloatTy() || Ty
->isHalfTy() || Ty
->isFloatTy() || Ty
->isDoubleTy())
1845 return Op
->getValueAPF().convertToDouble();
1848 APFloat APF
= Op
->getValueAPF();
1849 APF
.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven
, &unused
);
1850 return APF
.convertToDouble();
1853 static bool getConstIntOrUndef(Value
*Op
, const APInt
*&C
) {
1854 if (auto *CI
= dyn_cast
<ConstantInt
>(Op
)) {
1855 C
= &CI
->getValue();
1858 if (isa
<UndefValue
>(Op
)) {
1865 /// Checks if the given intrinsic call, which evaluates to constant, is allowed
1868 /// \param CI Constrained intrinsic call.
1869 /// \param St Exception flags raised during constant evaluation.
1870 static bool mayFoldConstrained(ConstrainedFPIntrinsic
*CI
,
1871 APFloat::opStatus St
) {
1872 Optional
<RoundingMode
> ORM
= CI
->getRoundingMode();
1873 Optional
<fp::ExceptionBehavior
> EB
= CI
->getExceptionBehavior();
1875 // If the operation does not change exception status flags, it is safe
1877 if (St
== APFloat::opStatus::opOK
) {
1878 // When FP exceptions are not ignored, intrinsic call will not be
1879 // eliminated, because it is considered as having side effect. But we
1880 // know that its evaluation does not raise exceptions, so side effect
1881 // is absent. To allow removing the call, mark it as not accessing memory.
1882 if (EB
&& *EB
!= fp::ExceptionBehavior::ebIgnore
)
1883 CI
->addFnAttr(Attribute::ReadNone
);
1887 // If evaluation raised FP exception, the result can depend on rounding
1888 // mode. If the latter is unknown, folding is not possible.
1889 if (!ORM
|| *ORM
== RoundingMode::Dynamic
)
1892 // If FP exceptions are ignored, fold the call, even if such exception is
1894 if (!EB
|| *EB
!= fp::ExceptionBehavior::ebStrict
)
1897 // Leave the calculation for runtime so that exception flags be correctly set
1902 /// Returns the rounding mode that should be used for constant evaluation.
1904 getEvaluationRoundingMode(const ConstrainedFPIntrinsic
*CI
) {
1905 Optional
<RoundingMode
> ORM
= CI
->getRoundingMode();
1906 if (!ORM
|| *ORM
== RoundingMode::Dynamic
)
1907 // Even if the rounding mode is unknown, try evaluating the operation.
1908 // If it does not raise inexact exception, rounding was not applied,
1909 // so the result is exact and does not depend on rounding mode. Whether
1910 // other FP exceptions are raised, it does not depend on rounding mode.
1911 return RoundingMode::NearestTiesToEven
;
1915 static Constant
*ConstantFoldScalarCall1(StringRef Name
,
1916 Intrinsic::ID IntrinsicID
,
1918 ArrayRef
<Constant
*> Operands
,
1919 const TargetLibraryInfo
*TLI
,
1920 const CallBase
*Call
) {
1921 assert(Operands
.size() == 1 && "Wrong number of operands.");
1923 if (IntrinsicID
== Intrinsic::is_constant
) {
1924 // We know we have a "Constant" argument. But we want to only
1925 // return true for manifest constants, not those that depend on
1926 // constants with unknowable values, e.g. GlobalValue or BlockAddress.
1927 if (Operands
[0]->isManifestConstant())
1928 return ConstantInt::getTrue(Ty
->getContext());
1931 if (isa
<UndefValue
>(Operands
[0])) {
1932 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
1933 // ctpop() is between 0 and bitwidth, pick 0 for undef.
1934 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
1935 if (IntrinsicID
== Intrinsic::cos
||
1936 IntrinsicID
== Intrinsic::ctpop
||
1937 IntrinsicID
== Intrinsic::fptoui_sat
||
1938 IntrinsicID
== Intrinsic::fptosi_sat
)
1939 return Constant::getNullValue(Ty
);
1940 if (IntrinsicID
== Intrinsic::bswap
||
1941 IntrinsicID
== Intrinsic::bitreverse
||
1942 IntrinsicID
== Intrinsic::launder_invariant_group
||
1943 IntrinsicID
== Intrinsic::strip_invariant_group
)
1947 if (isa
<ConstantPointerNull
>(Operands
[0])) {
1948 // launder(null) == null == strip(null) iff in addrspace 0
1949 if (IntrinsicID
== Intrinsic::launder_invariant_group
||
1950 IntrinsicID
== Intrinsic::strip_invariant_group
) {
1951 // If instruction is not yet put in a basic block (e.g. when cloning
1952 // a function during inlining), Call's caller may not be available.
1953 // So check Call's BB first before querying Call->getCaller.
1954 const Function
*Caller
=
1955 Call
->getParent() ? Call
->getCaller() : nullptr;
1957 !NullPointerIsDefined(
1958 Caller
, Operands
[0]->getType()->getPointerAddressSpace())) {
1965 if (auto *Op
= dyn_cast
<ConstantFP
>(Operands
[0])) {
1966 if (IntrinsicID
== Intrinsic::convert_to_fp16
) {
1967 APFloat
Val(Op
->getValueAPF());
1970 Val
.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven
, &lost
);
1972 return ConstantInt::get(Ty
->getContext(), Val
.bitcastToAPInt());
1975 APFloat U
= Op
->getValueAPF();
1977 if (IntrinsicID
== Intrinsic::wasm_trunc_signed
||
1978 IntrinsicID
== Intrinsic::wasm_trunc_unsigned
) {
1979 bool Signed
= IntrinsicID
== Intrinsic::wasm_trunc_signed
;
1984 unsigned Width
= Ty
->getIntegerBitWidth();
1985 APSInt
Int(Width
, !Signed
);
1986 bool IsExact
= false;
1987 APFloat::opStatus Status
=
1988 U
.convertToInteger(Int
, APFloat::rmTowardZero
, &IsExact
);
1990 if (Status
== APFloat::opOK
|| Status
== APFloat::opInexact
)
1991 return ConstantInt::get(Ty
, Int
);
1996 if (IntrinsicID
== Intrinsic::fptoui_sat
||
1997 IntrinsicID
== Intrinsic::fptosi_sat
) {
1998 // convertToInteger() already has the desired saturation semantics.
1999 APSInt
Int(Ty
->getIntegerBitWidth(),
2000 IntrinsicID
== Intrinsic::fptoui_sat
);
2002 U
.convertToInteger(Int
, APFloat::rmTowardZero
, &IsExact
);
2003 return ConstantInt::get(Ty
, Int
);
2006 if (IntrinsicID
== Intrinsic::isnan
)
2007 return ConstantInt::get(Ty
, U
.isNaN());
2009 if (!Ty
->isHalfTy() && !Ty
->isFloatTy() && !Ty
->isDoubleTy())
2012 // Use internal versions of these intrinsics.
2014 if (IntrinsicID
== Intrinsic::nearbyint
|| IntrinsicID
== Intrinsic::rint
) {
2015 U
.roundToIntegral(APFloat::rmNearestTiesToEven
);
2016 return ConstantFP::get(Ty
->getContext(), U
);
2019 if (IntrinsicID
== Intrinsic::round
) {
2020 U
.roundToIntegral(APFloat::rmNearestTiesToAway
);
2021 return ConstantFP::get(Ty
->getContext(), U
);
2024 if (IntrinsicID
== Intrinsic::roundeven
) {
2025 U
.roundToIntegral(APFloat::rmNearestTiesToEven
);
2026 return ConstantFP::get(Ty
->getContext(), U
);
2029 if (IntrinsicID
== Intrinsic::ceil
) {
2030 U
.roundToIntegral(APFloat::rmTowardPositive
);
2031 return ConstantFP::get(Ty
->getContext(), U
);
2034 if (IntrinsicID
== Intrinsic::floor
) {
2035 U
.roundToIntegral(APFloat::rmTowardNegative
);
2036 return ConstantFP::get(Ty
->getContext(), U
);
2039 if (IntrinsicID
== Intrinsic::trunc
) {
2040 U
.roundToIntegral(APFloat::rmTowardZero
);
2041 return ConstantFP::get(Ty
->getContext(), U
);
2044 if (IntrinsicID
== Intrinsic::fabs
) {
2046 return ConstantFP::get(Ty
->getContext(), U
);
2049 if (IntrinsicID
== Intrinsic::amdgcn_fract
) {
2050 // The v_fract instruction behaves like the OpenCL spec, which defines
2051 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
2052 // there to prevent fract(-small) from returning 1.0. It returns the
2053 // largest positive floating-point number less than 1.0."
2055 FloorU
.roundToIntegral(APFloat::rmTowardNegative
);
2056 APFloat
FractU(U
- FloorU
);
2057 APFloat
AlmostOne(U
.getSemantics(), 1);
2058 AlmostOne
.next(/*nextDown*/ true);
2059 return ConstantFP::get(Ty
->getContext(), minimum(FractU
, AlmostOne
));
2062 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
2063 // raise FP exceptions, unless the argument is signaling NaN.
2065 Optional
<APFloat::roundingMode
> RM
;
2066 switch (IntrinsicID
) {
2069 case Intrinsic::experimental_constrained_nearbyint
:
2070 case Intrinsic::experimental_constrained_rint
: {
2071 auto CI
= cast
<ConstrainedFPIntrinsic
>(Call
);
2072 RM
= CI
->getRoundingMode();
2073 if (!RM
|| RM
.getValue() == RoundingMode::Dynamic
)
2077 case Intrinsic::experimental_constrained_round
:
2078 RM
= APFloat::rmNearestTiesToAway
;
2080 case Intrinsic::experimental_constrained_ceil
:
2081 RM
= APFloat::rmTowardPositive
;
2083 case Intrinsic::experimental_constrained_floor
:
2084 RM
= APFloat::rmTowardNegative
;
2086 case Intrinsic::experimental_constrained_trunc
:
2087 RM
= APFloat::rmTowardZero
;
2091 auto CI
= cast
<ConstrainedFPIntrinsic
>(Call
);
2093 APFloat::opStatus St
= U
.roundToIntegral(*RM
);
2094 if (IntrinsicID
== Intrinsic::experimental_constrained_rint
&&
2095 St
== APFloat::opInexact
) {
2096 Optional
<fp::ExceptionBehavior
> EB
= CI
->getExceptionBehavior();
2097 if (EB
&& *EB
== fp::ebStrict
)
2100 } else if (U
.isSignaling()) {
2101 Optional
<fp::ExceptionBehavior
> EB
= CI
->getExceptionBehavior();
2102 if (EB
&& *EB
!= fp::ebIgnore
)
2104 U
= APFloat::getQNaN(U
.getSemantics());
2106 return ConstantFP::get(Ty
->getContext(), U
);
2109 /// We only fold functions with finite arguments. Folding NaN and inf is
2110 /// likely to be aborted with an exception anyway, and some host libms
2111 /// have known errors raising exceptions.
2115 /// Currently APFloat versions of these functions do not exist, so we use
2116 /// the host native double versions. Float versions are not called
2117 /// directly but for all these it is true (float)(f((double)arg)) ==
2118 /// f(arg). Long double not supported yet.
2119 APFloat APF
= Op
->getValueAPF();
2121 switch (IntrinsicID
) {
2123 case Intrinsic::log
:
2124 return ConstantFoldFP(log
, APF
, Ty
);
2125 case Intrinsic::log2
:
2126 // TODO: What about hosts that lack a C99 library?
2127 return ConstantFoldFP(Log2
, APF
, Ty
);
2128 case Intrinsic::log10
:
2129 // TODO: What about hosts that lack a C99 library?
2130 return ConstantFoldFP(log10
, APF
, Ty
);
2131 case Intrinsic::exp
:
2132 return ConstantFoldFP(exp
, APF
, Ty
);
2133 case Intrinsic::exp2
:
2134 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2135 return ConstantFoldBinaryFP(pow
, APFloat(2.0), APF
, Ty
);
2136 case Intrinsic::sin
:
2137 return ConstantFoldFP(sin
, APF
, Ty
);
2138 case Intrinsic::cos
:
2139 return ConstantFoldFP(cos
, APF
, Ty
);
2140 case Intrinsic::sqrt
:
2141 return ConstantFoldFP(sqrt
, APF
, Ty
);
2142 case Intrinsic::amdgcn_cos
:
2143 case Intrinsic::amdgcn_sin
: {
2144 double V
= getValueAsDouble(Op
);
2145 if (V
< -256.0 || V
> 256.0)
2146 // The gfx8 and gfx9 architectures handle arguments outside the range
2147 // [-256, 256] differently. This should be a rare case so bail out
2148 // rather than trying to handle the difference.
2150 bool IsCos
= IntrinsicID
== Intrinsic::amdgcn_cos
;
2151 double V4
= V
* 4.0;
2152 if (V4
== floor(V4
)) {
2153 // Force exact results for quarter-integer inputs.
2154 const double SinVals
[4] = { 0.0, 1.0, 0.0, -1.0 };
2155 V
= SinVals
[((int)V4
+ (IsCos
? 1 : 0)) & 3];
2158 V
= cos(V
* 2.0 * numbers::pi
);
2160 V
= sin(V
* 2.0 * numbers::pi
);
2162 return GetConstantFoldFPValue(V
, Ty
);
2169 LibFunc Func
= NotLibFunc
;
2170 TLI
->getLibFunc(Name
, Func
);
2176 case LibFunc_acos_finite
:
2177 case LibFunc_acosf_finite
:
2179 return ConstantFoldFP(acos
, APF
, Ty
);
2183 case LibFunc_asin_finite
:
2184 case LibFunc_asinf_finite
:
2186 return ConstantFoldFP(asin
, APF
, Ty
);
2191 return ConstantFoldFP(atan
, APF
, Ty
);
2195 if (TLI
->has(Func
)) {
2196 U
.roundToIntegral(APFloat::rmTowardPositive
);
2197 return ConstantFP::get(Ty
->getContext(), U
);
2203 return ConstantFoldFP(cos
, APF
, Ty
);
2207 case LibFunc_cosh_finite
:
2208 case LibFunc_coshf_finite
:
2210 return ConstantFoldFP(cosh
, APF
, Ty
);
2214 case LibFunc_exp_finite
:
2215 case LibFunc_expf_finite
:
2217 return ConstantFoldFP(exp
, APF
, Ty
);
2221 case LibFunc_exp2_finite
:
2222 case LibFunc_exp2f_finite
:
2224 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2225 return ConstantFoldBinaryFP(pow
, APFloat(2.0), APF
, Ty
);
2229 if (TLI
->has(Func
)) {
2231 return ConstantFP::get(Ty
->getContext(), U
);
2235 case LibFunc_floorf
:
2236 if (TLI
->has(Func
)) {
2237 U
.roundToIntegral(APFloat::rmTowardNegative
);
2238 return ConstantFP::get(Ty
->getContext(), U
);
2243 case LibFunc_log_finite
:
2244 case LibFunc_logf_finite
:
2245 if (!APF
.isNegative() && !APF
.isZero() && TLI
->has(Func
))
2246 return ConstantFoldFP(log
, APF
, Ty
);
2250 case LibFunc_log2_finite
:
2251 case LibFunc_log2f_finite
:
2252 if (!APF
.isNegative() && !APF
.isZero() && TLI
->has(Func
))
2253 // TODO: What about hosts that lack a C99 library?
2254 return ConstantFoldFP(Log2
, APF
, Ty
);
2257 case LibFunc_log10f
:
2258 case LibFunc_log10_finite
:
2259 case LibFunc_log10f_finite
:
2260 if (!APF
.isNegative() && !APF
.isZero() && TLI
->has(Func
))
2261 // TODO: What about hosts that lack a C99 library?
2262 return ConstantFoldFP(log10
, APF
, Ty
);
2264 case LibFunc_nearbyint
:
2265 case LibFunc_nearbyintf
:
2268 if (TLI
->has(Func
)) {
2269 U
.roundToIntegral(APFloat::rmNearestTiesToEven
);
2270 return ConstantFP::get(Ty
->getContext(), U
);
2274 case LibFunc_roundf
:
2275 if (TLI
->has(Func
)) {
2276 U
.roundToIntegral(APFloat::rmNearestTiesToAway
);
2277 return ConstantFP::get(Ty
->getContext(), U
);
2283 return ConstantFoldFP(sin
, APF
, Ty
);
2287 case LibFunc_sinh_finite
:
2288 case LibFunc_sinhf_finite
:
2290 return ConstantFoldFP(sinh
, APF
, Ty
);
2294 if (!APF
.isNegative() && TLI
->has(Func
))
2295 return ConstantFoldFP(sqrt
, APF
, Ty
);
2300 return ConstantFoldFP(tan
, APF
, Ty
);
2305 return ConstantFoldFP(tanh
, APF
, Ty
);
2308 case LibFunc_truncf
:
2309 if (TLI
->has(Func
)) {
2310 U
.roundToIntegral(APFloat::rmTowardZero
);
2311 return ConstantFP::get(Ty
->getContext(), U
);
2318 if (auto *Op
= dyn_cast
<ConstantInt
>(Operands
[0])) {
2319 switch (IntrinsicID
) {
2320 case Intrinsic::bswap
:
2321 return ConstantInt::get(Ty
->getContext(), Op
->getValue().byteSwap());
2322 case Intrinsic::ctpop
:
2323 return ConstantInt::get(Ty
, Op
->getValue().countPopulation());
2324 case Intrinsic::bitreverse
:
2325 return ConstantInt::get(Ty
->getContext(), Op
->getValue().reverseBits());
2326 case Intrinsic::convert_from_fp16
: {
2327 APFloat
Val(APFloat::IEEEhalf(), Op
->getValue());
2330 APFloat::opStatus status
= Val
.convert(
2331 Ty
->getFltSemantics(), APFloat::rmNearestTiesToEven
, &lost
);
2333 // Conversion is always precise.
2335 assert(status
== APFloat::opOK
&& !lost
&&
2336 "Precision lost during fp16 constfolding");
2338 return ConstantFP::get(Ty
->getContext(), Val
);
2345 switch (IntrinsicID
) {
2347 case Intrinsic::vector_reduce_add
:
2348 case Intrinsic::vector_reduce_mul
:
2349 case Intrinsic::vector_reduce_and
:
2350 case Intrinsic::vector_reduce_or
:
2351 case Intrinsic::vector_reduce_xor
:
2352 case Intrinsic::vector_reduce_smin
:
2353 case Intrinsic::vector_reduce_smax
:
2354 case Intrinsic::vector_reduce_umin
:
2355 case Intrinsic::vector_reduce_umax
:
2356 if (Constant
*C
= constantFoldVectorReduce(IntrinsicID
, Operands
[0]))
2361 // Support ConstantVector in case we have an Undef in the top.
2362 if (isa
<ConstantVector
>(Operands
[0]) ||
2363 isa
<ConstantDataVector
>(Operands
[0])) {
2364 auto *Op
= cast
<Constant
>(Operands
[0]);
2365 switch (IntrinsicID
) {
2367 case Intrinsic::x86_sse_cvtss2si
:
2368 case Intrinsic::x86_sse_cvtss2si64
:
2369 case Intrinsic::x86_sse2_cvtsd2si
:
2370 case Intrinsic::x86_sse2_cvtsd2si64
:
2371 if (ConstantFP
*FPOp
=
2372 dyn_cast_or_null
<ConstantFP
>(Op
->getAggregateElement(0U)))
2373 return ConstantFoldSSEConvertToInt(FPOp
->getValueAPF(),
2374 /*roundTowardZero=*/false, Ty
,
2377 case Intrinsic::x86_sse_cvttss2si
:
2378 case Intrinsic::x86_sse_cvttss2si64
:
2379 case Intrinsic::x86_sse2_cvttsd2si
:
2380 case Intrinsic::x86_sse2_cvttsd2si64
:
2381 if (ConstantFP
*FPOp
=
2382 dyn_cast_or_null
<ConstantFP
>(Op
->getAggregateElement(0U)))
2383 return ConstantFoldSSEConvertToInt(FPOp
->getValueAPF(),
2384 /*roundTowardZero=*/true, Ty
,
2393 static Constant
*ConstantFoldScalarCall2(StringRef Name
,
2394 Intrinsic::ID IntrinsicID
,
2396 ArrayRef
<Constant
*> Operands
,
2397 const TargetLibraryInfo
*TLI
,
2398 const CallBase
*Call
) {
2399 assert(Operands
.size() == 2 && "Wrong number of operands.");
2401 if (Ty
->isFloatingPointTy()) {
2402 // TODO: We should have undef handling for all of the FP intrinsics that
2403 // are attempted to be folded in this function.
2404 bool IsOp0Undef
= isa
<UndefValue
>(Operands
[0]);
2405 bool IsOp1Undef
= isa
<UndefValue
>(Operands
[1]);
2406 switch (IntrinsicID
) {
2407 case Intrinsic::maxnum
:
2408 case Intrinsic::minnum
:
2409 case Intrinsic::maximum
:
2410 case Intrinsic::minimum
:
2411 // If one argument is undef, return the other argument.
2420 if (const auto *Op1
= dyn_cast
<ConstantFP
>(Operands
[0])) {
2421 if (!Ty
->isFloatingPointTy())
2423 APFloat Op1V
= Op1
->getValueAPF();
2425 if (const auto *Op2
= dyn_cast
<ConstantFP
>(Operands
[1])) {
2426 if (Op2
->getType() != Op1
->getType())
2428 APFloat Op2V
= Op2
->getValueAPF();
2430 if (const auto *ConstrIntr
= dyn_cast
<ConstrainedFPIntrinsic
>(Call
)) {
2431 RoundingMode RM
= getEvaluationRoundingMode(ConstrIntr
);
2433 APFloat::opStatus St
;
2434 switch (IntrinsicID
) {
2437 case Intrinsic::experimental_constrained_fadd
:
2438 St
= Res
.add(Op2V
, RM
);
2440 case Intrinsic::experimental_constrained_fsub
:
2441 St
= Res
.subtract(Op2V
, RM
);
2443 case Intrinsic::experimental_constrained_fmul
:
2444 St
= Res
.multiply(Op2V
, RM
);
2446 case Intrinsic::experimental_constrained_fdiv
:
2447 St
= Res
.divide(Op2V
, RM
);
2449 case Intrinsic::experimental_constrained_frem
:
2453 if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic
*>(ConstrIntr
),
2455 return ConstantFP::get(Ty
->getContext(), Res
);
2459 switch (IntrinsicID
) {
2462 case Intrinsic::copysign
:
2463 return ConstantFP::get(Ty
->getContext(), APFloat::copySign(Op1V
, Op2V
));
2464 case Intrinsic::minnum
:
2465 return ConstantFP::get(Ty
->getContext(), minnum(Op1V
, Op2V
));
2466 case Intrinsic::maxnum
:
2467 return ConstantFP::get(Ty
->getContext(), maxnum(Op1V
, Op2V
));
2468 case Intrinsic::minimum
:
2469 return ConstantFP::get(Ty
->getContext(), minimum(Op1V
, Op2V
));
2470 case Intrinsic::maximum
:
2471 return ConstantFP::get(Ty
->getContext(), maximum(Op1V
, Op2V
));
2474 if (!Ty
->isHalfTy() && !Ty
->isFloatTy() && !Ty
->isDoubleTy())
2477 switch (IntrinsicID
) {
2480 case Intrinsic::pow
:
2481 return ConstantFoldBinaryFP(pow
, Op1V
, Op2V
, Ty
);
2482 case Intrinsic::amdgcn_fmul_legacy
:
2483 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
2484 // NaN or infinity, gives +0.0.
2485 if (Op1V
.isZero() || Op2V
.isZero())
2486 return ConstantFP::getNullValue(Ty
);
2487 return ConstantFP::get(Ty
->getContext(), Op1V
* Op2V
);
2493 LibFunc Func
= NotLibFunc
;
2494 TLI
->getLibFunc(Name
, Func
);
2500 case LibFunc_pow_finite
:
2501 case LibFunc_powf_finite
:
2503 return ConstantFoldBinaryFP(pow
, Op1V
, Op2V
, Ty
);
2507 if (TLI
->has(Func
)) {
2508 APFloat V
= Op1
->getValueAPF();
2509 if (APFloat::opStatus::opOK
== V
.mod(Op2
->getValueAPF()))
2510 return ConstantFP::get(Ty
->getContext(), V
);
2513 case LibFunc_remainder
:
2514 case LibFunc_remainderf
:
2515 if (TLI
->has(Func
)) {
2516 APFloat V
= Op1
->getValueAPF();
2517 if (APFloat::opStatus::opOK
== V
.remainder(Op2
->getValueAPF()))
2518 return ConstantFP::get(Ty
->getContext(), V
);
2522 case LibFunc_atan2f
:
2523 case LibFunc_atan2_finite
:
2524 case LibFunc_atan2f_finite
:
2526 return ConstantFoldBinaryFP(atan2
, Op1V
, Op2V
, Ty
);
2529 } else if (auto *Op2C
= dyn_cast
<ConstantInt
>(Operands
[1])) {
2530 if (!Ty
->isHalfTy() && !Ty
->isFloatTy() && !Ty
->isDoubleTy())
2532 if (IntrinsicID
== Intrinsic::powi
&& Ty
->isHalfTy())
2533 return ConstantFP::get(
2535 APFloat((float)std::pow((float)Op1V
.convertToDouble(),
2536 (int)Op2C
->getZExtValue())));
2537 if (IntrinsicID
== Intrinsic::powi
&& Ty
->isFloatTy())
2538 return ConstantFP::get(
2540 APFloat((float)std::pow((float)Op1V
.convertToDouble(),
2541 (int)Op2C
->getZExtValue())));
2542 if (IntrinsicID
== Intrinsic::powi
&& Ty
->isDoubleTy())
2543 return ConstantFP::get(
2545 APFloat((double)std::pow(Op1V
.convertToDouble(),
2546 (int)Op2C
->getZExtValue())));
2548 if (IntrinsicID
== Intrinsic::amdgcn_ldexp
) {
2549 // FIXME: Should flush denorms depending on FP mode, but that's ignored
2552 // scalbn is equivalent to ldexp with float radix 2
2553 APFloat Result
= scalbn(Op1
->getValueAPF(), Op2C
->getSExtValue(),
2554 APFloat::rmNearestTiesToEven
);
2555 return ConstantFP::get(Ty
->getContext(), Result
);
2561 if (Operands
[0]->getType()->isIntegerTy() &&
2562 Operands
[1]->getType()->isIntegerTy()) {
2563 const APInt
*C0
, *C1
;
2564 if (!getConstIntOrUndef(Operands
[0], C0
) ||
2565 !getConstIntOrUndef(Operands
[1], C1
))
2568 unsigned BitWidth
= Ty
->getScalarSizeInBits();
2569 switch (IntrinsicID
) {
2571 case Intrinsic::smax
:
2573 return UndefValue::get(Ty
);
2575 return ConstantInt::get(Ty
, APInt::getSignedMaxValue(BitWidth
));
2576 return ConstantInt::get(Ty
, C0
->sgt(*C1
) ? *C0
: *C1
);
2578 case Intrinsic::smin
:
2580 return UndefValue::get(Ty
);
2582 return ConstantInt::get(Ty
, APInt::getSignedMinValue(BitWidth
));
2583 return ConstantInt::get(Ty
, C0
->slt(*C1
) ? *C0
: *C1
);
2585 case Intrinsic::umax
:
2587 return UndefValue::get(Ty
);
2589 return ConstantInt::get(Ty
, APInt::getMaxValue(BitWidth
));
2590 return ConstantInt::get(Ty
, C0
->ugt(*C1
) ? *C0
: *C1
);
2592 case Intrinsic::umin
:
2594 return UndefValue::get(Ty
);
2596 return ConstantInt::get(Ty
, APInt::getMinValue(BitWidth
));
2597 return ConstantInt::get(Ty
, C0
->ult(*C1
) ? *C0
: *C1
);
2599 case Intrinsic::usub_with_overflow
:
2600 case Intrinsic::ssub_with_overflow
:
2601 // X - undef -> { 0, false }
2602 // undef - X -> { 0, false }
2604 return Constant::getNullValue(Ty
);
2606 case Intrinsic::uadd_with_overflow
:
2607 case Intrinsic::sadd_with_overflow
:
2608 // X + undef -> { -1, false }
2609 // undef + x -> { -1, false }
2611 return ConstantStruct::get(
2612 cast
<StructType
>(Ty
),
2613 {Constant::getAllOnesValue(Ty
->getStructElementType(0)),
2614 Constant::getNullValue(Ty
->getStructElementType(1))});
2617 case Intrinsic::smul_with_overflow
:
2618 case Intrinsic::umul_with_overflow
: {
2619 // undef * X -> { 0, false }
2620 // X * undef -> { 0, false }
2622 return Constant::getNullValue(Ty
);
2626 switch (IntrinsicID
) {
2627 default: llvm_unreachable("Invalid case");
2628 case Intrinsic::sadd_with_overflow
:
2629 Res
= C0
->sadd_ov(*C1
, Overflow
);
2631 case Intrinsic::uadd_with_overflow
:
2632 Res
= C0
->uadd_ov(*C1
, Overflow
);
2634 case Intrinsic::ssub_with_overflow
:
2635 Res
= C0
->ssub_ov(*C1
, Overflow
);
2637 case Intrinsic::usub_with_overflow
:
2638 Res
= C0
->usub_ov(*C1
, Overflow
);
2640 case Intrinsic::smul_with_overflow
:
2641 Res
= C0
->smul_ov(*C1
, Overflow
);
2643 case Intrinsic::umul_with_overflow
:
2644 Res
= C0
->umul_ov(*C1
, Overflow
);
2648 ConstantInt::get(Ty
->getContext(), Res
),
2649 ConstantInt::get(Type::getInt1Ty(Ty
->getContext()), Overflow
)
2651 return ConstantStruct::get(cast
<StructType
>(Ty
), Ops
);
2653 case Intrinsic::uadd_sat
:
2654 case Intrinsic::sadd_sat
:
2656 return UndefValue::get(Ty
);
2658 return Constant::getAllOnesValue(Ty
);
2659 if (IntrinsicID
== Intrinsic::uadd_sat
)
2660 return ConstantInt::get(Ty
, C0
->uadd_sat(*C1
));
2662 return ConstantInt::get(Ty
, C0
->sadd_sat(*C1
));
2663 case Intrinsic::usub_sat
:
2664 case Intrinsic::ssub_sat
:
2666 return UndefValue::get(Ty
);
2668 return Constant::getNullValue(Ty
);
2669 if (IntrinsicID
== Intrinsic::usub_sat
)
2670 return ConstantInt::get(Ty
, C0
->usub_sat(*C1
));
2672 return ConstantInt::get(Ty
, C0
->ssub_sat(*C1
));
2673 case Intrinsic::cttz
:
2674 case Intrinsic::ctlz
:
2675 assert(C1
&& "Must be constant int");
2677 // cttz(0, 1) and ctlz(0, 1) are undef.
2678 if (C1
->isOneValue() && (!C0
|| C0
->isNullValue()))
2679 return UndefValue::get(Ty
);
2681 return Constant::getNullValue(Ty
);
2682 if (IntrinsicID
== Intrinsic::cttz
)
2683 return ConstantInt::get(Ty
, C0
->countTrailingZeros());
2685 return ConstantInt::get(Ty
, C0
->countLeadingZeros());
2687 case Intrinsic::abs
:
2688 // Undef or minimum val operand with poison min --> undef
2689 assert(C1
&& "Must be constant int");
2690 if (C1
->isOneValue() && (!C0
|| C0
->isMinSignedValue()))
2691 return UndefValue::get(Ty
);
2693 // Undef operand with no poison min --> 0 (sign bit must be clear)
2694 if (C1
->isNullValue() && !C0
)
2695 return Constant::getNullValue(Ty
);
2697 return ConstantInt::get(Ty
, C0
->abs());
2703 // Support ConstantVector in case we have an Undef in the top.
2704 if ((isa
<ConstantVector
>(Operands
[0]) ||
2705 isa
<ConstantDataVector
>(Operands
[0])) &&
2706 // Check for default rounding mode.
2707 // FIXME: Support other rounding modes?
2708 isa
<ConstantInt
>(Operands
[1]) &&
2709 cast
<ConstantInt
>(Operands
[1])->getValue() == 4) {
2710 auto *Op
= cast
<Constant
>(Operands
[0]);
2711 switch (IntrinsicID
) {
2713 case Intrinsic::x86_avx512_vcvtss2si32
:
2714 case Intrinsic::x86_avx512_vcvtss2si64
:
2715 case Intrinsic::x86_avx512_vcvtsd2si32
:
2716 case Intrinsic::x86_avx512_vcvtsd2si64
:
2717 if (ConstantFP
*FPOp
=
2718 dyn_cast_or_null
<ConstantFP
>(Op
->getAggregateElement(0U)))
2719 return ConstantFoldSSEConvertToInt(FPOp
->getValueAPF(),
2720 /*roundTowardZero=*/false, Ty
,
2723 case Intrinsic::x86_avx512_vcvtss2usi32
:
2724 case Intrinsic::x86_avx512_vcvtss2usi64
:
2725 case Intrinsic::x86_avx512_vcvtsd2usi32
:
2726 case Intrinsic::x86_avx512_vcvtsd2usi64
:
2727 if (ConstantFP
*FPOp
=
2728 dyn_cast_or_null
<ConstantFP
>(Op
->getAggregateElement(0U)))
2729 return ConstantFoldSSEConvertToInt(FPOp
->getValueAPF(),
2730 /*roundTowardZero=*/false, Ty
,
2733 case Intrinsic::x86_avx512_cvttss2si
:
2734 case Intrinsic::x86_avx512_cvttss2si64
:
2735 case Intrinsic::x86_avx512_cvttsd2si
:
2736 case Intrinsic::x86_avx512_cvttsd2si64
:
2737 if (ConstantFP
*FPOp
=
2738 dyn_cast_or_null
<ConstantFP
>(Op
->getAggregateElement(0U)))
2739 return ConstantFoldSSEConvertToInt(FPOp
->getValueAPF(),
2740 /*roundTowardZero=*/true, Ty
,
2743 case Intrinsic::x86_avx512_cvttss2usi
:
2744 case Intrinsic::x86_avx512_cvttss2usi64
:
2745 case Intrinsic::x86_avx512_cvttsd2usi
:
2746 case Intrinsic::x86_avx512_cvttsd2usi64
:
2747 if (ConstantFP
*FPOp
=
2748 dyn_cast_or_null
<ConstantFP
>(Op
->getAggregateElement(0U)))
2749 return ConstantFoldSSEConvertToInt(FPOp
->getValueAPF(),
2750 /*roundTowardZero=*/true, Ty
,
2758 static APFloat
ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID
,
2761 const APFloat
&S2
) {
2763 const fltSemantics
&Sem
= S0
.getSemantics();
2764 APFloat
MA(Sem
), SC(Sem
), TC(Sem
);
2765 if (abs(S2
) >= abs(S0
) && abs(S2
) >= abs(S1
)) {
2766 if (S2
.isNegative() && S2
.isNonZero() && !S2
.isNaN()) {
2776 } else if (abs(S1
) >= abs(S0
)) {
2777 if (S1
.isNegative() && S1
.isNonZero() && !S1
.isNaN()) {
2788 if (S0
.isNegative() && S0
.isNonZero() && !S0
.isNaN()) {
2799 switch (IntrinsicID
) {
2801 llvm_unreachable("unhandled amdgcn cube intrinsic");
2802 case Intrinsic::amdgcn_cubeid
:
2803 return APFloat(Sem
, ID
);
2804 case Intrinsic::amdgcn_cubema
:
2806 case Intrinsic::amdgcn_cubesc
:
2808 case Intrinsic::amdgcn_cubetc
:
2813 static Constant
*ConstantFoldAMDGCNPermIntrinsic(ArrayRef
<Constant
*> Operands
,
2815 const APInt
*C0
, *C1
, *C2
;
2816 if (!getConstIntOrUndef(Operands
[0], C0
) ||
2817 !getConstIntOrUndef(Operands
[1], C1
) ||
2818 !getConstIntOrUndef(Operands
[2], C2
))
2822 return UndefValue::get(Ty
);
2825 unsigned NumUndefBytes
= 0;
2826 for (unsigned I
= 0; I
< 32; I
+= 8) {
2827 unsigned Sel
= C2
->extractBitsAsZExtValue(8, I
);
2835 const APInt
*Src
= ((Sel
& 10) == 10 || (Sel
& 12) == 4) ? C0
: C1
;
2839 B
= Src
->extractBitsAsZExtValue(8, (Sel
& 3) * 8);
2841 B
= Src
->extractBitsAsZExtValue(1, (Sel
& 1) ? 31 : 15) * 0xff;
2844 Val
.insertBits(B
, I
, 8);
2847 if (NumUndefBytes
== 4)
2848 return UndefValue::get(Ty
);
2850 return ConstantInt::get(Ty
, Val
);
2853 static Constant
*ConstantFoldScalarCall3(StringRef Name
,
2854 Intrinsic::ID IntrinsicID
,
2856 ArrayRef
<Constant
*> Operands
,
2857 const TargetLibraryInfo
*TLI
,
2858 const CallBase
*Call
) {
2859 assert(Operands
.size() == 3 && "Wrong number of operands.");
2861 if (const auto *Op1
= dyn_cast
<ConstantFP
>(Operands
[0])) {
2862 if (const auto *Op2
= dyn_cast
<ConstantFP
>(Operands
[1])) {
2863 if (const auto *Op3
= dyn_cast
<ConstantFP
>(Operands
[2])) {
2864 const APFloat
&C1
= Op1
->getValueAPF();
2865 const APFloat
&C2
= Op2
->getValueAPF();
2866 const APFloat
&C3
= Op3
->getValueAPF();
2868 if (const auto *ConstrIntr
= dyn_cast
<ConstrainedFPIntrinsic
>(Call
)) {
2869 RoundingMode RM
= getEvaluationRoundingMode(ConstrIntr
);
2871 APFloat::opStatus St
;
2872 switch (IntrinsicID
) {
2875 case Intrinsic::experimental_constrained_fma
:
2876 case Intrinsic::experimental_constrained_fmuladd
:
2877 St
= Res
.fusedMultiplyAdd(C2
, C3
, RM
);
2880 if (mayFoldConstrained(
2881 const_cast<ConstrainedFPIntrinsic
*>(ConstrIntr
), St
))
2882 return ConstantFP::get(Ty
->getContext(), Res
);
2886 switch (IntrinsicID
) {
2888 case Intrinsic::amdgcn_fma_legacy
: {
2889 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
2890 // NaN or infinity, gives +0.0.
2891 if (C1
.isZero() || C2
.isZero()) {
2892 // It's tempting to just return C3 here, but that would give the
2893 // wrong result if C3 was -0.0.
2894 return ConstantFP::get(Ty
->getContext(), APFloat(0.0f
) + C3
);
2898 case Intrinsic::fma
:
2899 case Intrinsic::fmuladd
: {
2901 V
.fusedMultiplyAdd(C2
, C3
, APFloat::rmNearestTiesToEven
);
2902 return ConstantFP::get(Ty
->getContext(), V
);
2904 case Intrinsic::amdgcn_cubeid
:
2905 case Intrinsic::amdgcn_cubema
:
2906 case Intrinsic::amdgcn_cubesc
:
2907 case Intrinsic::amdgcn_cubetc
: {
2908 APFloat V
= ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID
, C1
, C2
, C3
);
2909 return ConstantFP::get(Ty
->getContext(), V
);
2916 if (IntrinsicID
== Intrinsic::smul_fix
||
2917 IntrinsicID
== Intrinsic::smul_fix_sat
) {
2918 // poison * C -> poison
2919 // C * poison -> poison
2920 if (isa
<PoisonValue
>(Operands
[0]) || isa
<PoisonValue
>(Operands
[1]))
2921 return PoisonValue::get(Ty
);
2923 const APInt
*C0
, *C1
;
2924 if (!getConstIntOrUndef(Operands
[0], C0
) ||
2925 !getConstIntOrUndef(Operands
[1], C1
))
2931 return Constant::getNullValue(Ty
);
2933 // This code performs rounding towards negative infinity in case the result
2934 // cannot be represented exactly for the given scale. Targets that do care
2935 // about rounding should use a target hook for specifying how rounding
2936 // should be done, and provide their own folding to be consistent with
2937 // rounding. This is the same approach as used by
2938 // DAGTypeLegalizer::ExpandIntRes_MULFIX.
2939 unsigned Scale
= cast
<ConstantInt
>(Operands
[2])->getZExtValue();
2940 unsigned Width
= C0
->getBitWidth();
2941 assert(Scale
< Width
&& "Illegal scale.");
2942 unsigned ExtendedWidth
= Width
* 2;
2943 APInt Product
= (C0
->sextOrSelf(ExtendedWidth
) *
2944 C1
->sextOrSelf(ExtendedWidth
)).ashr(Scale
);
2945 if (IntrinsicID
== Intrinsic::smul_fix_sat
) {
2946 APInt Max
= APInt::getSignedMaxValue(Width
).sextOrSelf(ExtendedWidth
);
2947 APInt Min
= APInt::getSignedMinValue(Width
).sextOrSelf(ExtendedWidth
);
2948 Product
= APIntOps::smin(Product
, Max
);
2949 Product
= APIntOps::smax(Product
, Min
);
2951 return ConstantInt::get(Ty
->getContext(), Product
.sextOrTrunc(Width
));
2954 if (IntrinsicID
== Intrinsic::fshl
|| IntrinsicID
== Intrinsic::fshr
) {
2955 const APInt
*C0
, *C1
, *C2
;
2956 if (!getConstIntOrUndef(Operands
[0], C0
) ||
2957 !getConstIntOrUndef(Operands
[1], C1
) ||
2958 !getConstIntOrUndef(Operands
[2], C2
))
2961 bool IsRight
= IntrinsicID
== Intrinsic::fshr
;
2963 return Operands
[IsRight
? 1 : 0];
2965 return UndefValue::get(Ty
);
2967 // The shift amount is interpreted as modulo the bitwidth. If the shift
2968 // amount is effectively 0, avoid UB due to oversized inverse shift below.
2969 unsigned BitWidth
= C2
->getBitWidth();
2970 unsigned ShAmt
= C2
->urem(BitWidth
);
2972 return Operands
[IsRight
? 1 : 0];
2974 // (C0 << ShlAmt) | (C1 >> LshrAmt)
2975 unsigned LshrAmt
= IsRight
? ShAmt
: BitWidth
- ShAmt
;
2976 unsigned ShlAmt
= !IsRight
? ShAmt
: BitWidth
- ShAmt
;
2978 return ConstantInt::get(Ty
, C1
->lshr(LshrAmt
));
2980 return ConstantInt::get(Ty
, C0
->shl(ShlAmt
));
2981 return ConstantInt::get(Ty
, C0
->shl(ShlAmt
) | C1
->lshr(LshrAmt
));
2984 if (IntrinsicID
== Intrinsic::amdgcn_perm
)
2985 return ConstantFoldAMDGCNPermIntrinsic(Operands
, Ty
);
2990 static Constant
*ConstantFoldScalarCall(StringRef Name
,
2991 Intrinsic::ID IntrinsicID
,
2993 ArrayRef
<Constant
*> Operands
,
2994 const TargetLibraryInfo
*TLI
,
2995 const CallBase
*Call
) {
2996 if (Operands
.size() == 1)
2997 return ConstantFoldScalarCall1(Name
, IntrinsicID
, Ty
, Operands
, TLI
, Call
);
2999 if (Operands
.size() == 2)
3000 return ConstantFoldScalarCall2(Name
, IntrinsicID
, Ty
, Operands
, TLI
, Call
);
3002 if (Operands
.size() == 3)
3003 return ConstantFoldScalarCall3(Name
, IntrinsicID
, Ty
, Operands
, TLI
, Call
);
3008 static Constant
*ConstantFoldFixedVectorCall(
3009 StringRef Name
, Intrinsic::ID IntrinsicID
, FixedVectorType
*FVTy
,
3010 ArrayRef
<Constant
*> Operands
, const DataLayout
&DL
,
3011 const TargetLibraryInfo
*TLI
, const CallBase
*Call
) {
3012 SmallVector
<Constant
*, 4> Result(FVTy
->getNumElements());
3013 SmallVector
<Constant
*, 4> Lane(Operands
.size());
3014 Type
*Ty
= FVTy
->getElementType();
3016 switch (IntrinsicID
) {
3017 case Intrinsic::masked_load
: {
3018 auto *SrcPtr
= Operands
[0];
3019 auto *Mask
= Operands
[2];
3020 auto *Passthru
= Operands
[3];
3022 Constant
*VecData
= ConstantFoldLoadFromConstPtr(SrcPtr
, FVTy
, DL
);
3024 SmallVector
<Constant
*, 32> NewElements
;
3025 for (unsigned I
= 0, E
= FVTy
->getNumElements(); I
!= E
; ++I
) {
3026 auto *MaskElt
= Mask
->getAggregateElement(I
);
3029 auto *PassthruElt
= Passthru
->getAggregateElement(I
);
3030 auto *VecElt
= VecData
? VecData
->getAggregateElement(I
) : nullptr;
3031 if (isa
<UndefValue
>(MaskElt
)) {
3033 NewElements
.push_back(PassthruElt
);
3035 NewElements
.push_back(VecElt
);
3039 if (MaskElt
->isNullValue()) {
3042 NewElements
.push_back(PassthruElt
);
3043 } else if (MaskElt
->isOneValue()) {
3046 NewElements
.push_back(VecElt
);
3051 if (NewElements
.size() != FVTy
->getNumElements())
3053 return ConstantVector::get(NewElements
);
3055 case Intrinsic::arm_mve_vctp8
:
3056 case Intrinsic::arm_mve_vctp16
:
3057 case Intrinsic::arm_mve_vctp32
:
3058 case Intrinsic::arm_mve_vctp64
: {
3059 if (auto *Op
= dyn_cast
<ConstantInt
>(Operands
[0])) {
3060 unsigned Lanes
= FVTy
->getNumElements();
3061 uint64_t Limit
= Op
->getZExtValue();
3062 // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
3063 // sure we get the limit right in that case and set all relevant lanes.
3064 if (IntrinsicID
== Intrinsic::arm_mve_vctp64
)
3067 SmallVector
<Constant
*, 16> NCs
;
3068 for (unsigned i
= 0; i
< Lanes
; i
++) {
3070 NCs
.push_back(ConstantInt::getTrue(Ty
));
3072 NCs
.push_back(ConstantInt::getFalse(Ty
));
3074 return ConstantVector::get(NCs
);
3078 case Intrinsic::get_active_lane_mask
: {
3079 auto *Op0
= dyn_cast
<ConstantInt
>(Operands
[0]);
3080 auto *Op1
= dyn_cast
<ConstantInt
>(Operands
[1]);
3082 unsigned Lanes
= FVTy
->getNumElements();
3083 uint64_t Base
= Op0
->getZExtValue();
3084 uint64_t Limit
= Op1
->getZExtValue();
3086 SmallVector
<Constant
*, 16> NCs
;
3087 for (unsigned i
= 0; i
< Lanes
; i
++) {
3088 if (Base
+ i
< Limit
)
3089 NCs
.push_back(ConstantInt::getTrue(Ty
));
3091 NCs
.push_back(ConstantInt::getFalse(Ty
));
3093 return ConstantVector::get(NCs
);
3101 for (unsigned I
= 0, E
= FVTy
->getNumElements(); I
!= E
; ++I
) {
3102 // Gather a column of constants.
3103 for (unsigned J
= 0, JE
= Operands
.size(); J
!= JE
; ++J
) {
3104 // Some intrinsics use a scalar type for certain arguments.
3105 if (hasVectorInstrinsicScalarOpd(IntrinsicID
, J
)) {
3106 Lane
[J
] = Operands
[J
];
3110 Constant
*Agg
= Operands
[J
]->getAggregateElement(I
);
3117 // Use the regular scalar folding to simplify this column.
3119 ConstantFoldScalarCall(Name
, IntrinsicID
, Ty
, Lane
, TLI
, Call
);
3125 return ConstantVector::get(Result
);
3128 static Constant
*ConstantFoldScalableVectorCall(
3129 StringRef Name
, Intrinsic::ID IntrinsicID
, ScalableVectorType
*SVTy
,
3130 ArrayRef
<Constant
*> Operands
, const DataLayout
&DL
,
3131 const TargetLibraryInfo
*TLI
, const CallBase
*Call
) {
3132 switch (IntrinsicID
) {
3133 case Intrinsic::aarch64_sve_convert_from_svbool
: {
3134 auto *Src
= dyn_cast
<Constant
>(Operands
[0]);
3135 if (!Src
|| !Src
->isNullValue())
3138 return ConstantInt::getFalse(SVTy
);
3146 } // end anonymous namespace
3148 Constant
*llvm::ConstantFoldCall(const CallBase
*Call
, Function
*F
,
3149 ArrayRef
<Constant
*> Operands
,
3150 const TargetLibraryInfo
*TLI
) {
3151 if (Call
->isNoBuiltin())
3156 // If this is not an intrinsic and not recognized as a library call, bail out.
3157 if (F
->getIntrinsicID() == Intrinsic::not_intrinsic
) {
3161 if (!TLI
->getLibFunc(*F
, LibF
))
3165 StringRef Name
= F
->getName();
3166 Type
*Ty
= F
->getReturnType();
3167 if (auto *FVTy
= dyn_cast
<FixedVectorType
>(Ty
))
3168 return ConstantFoldFixedVectorCall(
3169 Name
, F
->getIntrinsicID(), FVTy
, Operands
,
3170 F
->getParent()->getDataLayout(), TLI
, Call
);
3172 if (auto *SVTy
= dyn_cast
<ScalableVectorType
>(Ty
))
3173 return ConstantFoldScalableVectorCall(
3174 Name
, F
->getIntrinsicID(), SVTy
, Operands
,
3175 F
->getParent()->getDataLayout(), TLI
, Call
);
3177 // TODO: If this is a library function, we already discovered that above,
3178 // so we should pass the LibFunc, not the name (and it might be better
3179 // still to separate intrinsic handling from libcalls).
3180 return ConstantFoldScalarCall(Name
, F
->getIntrinsicID(), Ty
, Operands
, TLI
,
3184 bool llvm::isMathLibCallNoop(const CallBase
*Call
,
3185 const TargetLibraryInfo
*TLI
) {
3186 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
3187 // (and to some extent ConstantFoldScalarCall).
3188 if (Call
->isNoBuiltin() || Call
->isStrictFP())
3190 Function
*F
= Call
->getCalledFunction();
3195 if (!TLI
|| !TLI
->getLibFunc(*F
, Func
))
3198 if (Call
->getNumArgOperands() == 1) {
3199 if (ConstantFP
*OpC
= dyn_cast
<ConstantFP
>(Call
->getArgOperand(0))) {
3200 const APFloat
&Op
= OpC
->getValueAPF();
3208 case LibFunc_log10l
:
3210 case LibFunc_log10f
:
3211 return Op
.isNaN() || (!Op
.isZero() && !Op
.isNegative());
3216 // FIXME: These boundaries are slightly conservative.
3217 if (OpC
->getType()->isDoubleTy())
3218 return !(Op
< APFloat(-745.0) || Op
> APFloat(709.0));
3219 if (OpC
->getType()->isFloatTy())
3220 return !(Op
< APFloat(-103.0f
) || Op
> APFloat(88.0f
));
3226 // FIXME: These boundaries are slightly conservative.
3227 if (OpC
->getType()->isDoubleTy())
3228 return !(Op
< APFloat(-1074.0) || Op
> APFloat(1023.0));
3229 if (OpC
->getType()->isFloatTy())
3230 return !(Op
< APFloat(-149.0f
) || Op
> APFloat(127.0f
));
3239 return !Op
.isInfinity();
3243 case LibFunc_tanf
: {
3244 // FIXME: Stop using the host math library.
3245 // FIXME: The computation isn't done in the right precision.
3246 Type
*Ty
= OpC
->getType();
3247 if (Ty
->isDoubleTy() || Ty
->isFloatTy() || Ty
->isHalfTy())
3248 return ConstantFoldFP(tan
, OpC
->getValueAPF(), Ty
) != nullptr;
3258 return !(Op
< APFloat(Op
.getSemantics(), "-1") ||
3259 Op
> APFloat(Op
.getSemantics(), "1"));
3267 // FIXME: These boundaries are slightly conservative.
3268 if (OpC
->getType()->isDoubleTy())
3269 return !(Op
< APFloat(-710.0) || Op
> APFloat(710.0));
3270 if (OpC
->getType()->isFloatTy())
3271 return !(Op
< APFloat(-89.0f
) || Op
> APFloat(89.0f
));
3277 return Op
.isNaN() || Op
.isZero() || !Op
.isNegative();
3279 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
3287 if (Call
->getNumArgOperands() == 2) {
3288 ConstantFP
*Op0C
= dyn_cast
<ConstantFP
>(Call
->getArgOperand(0));
3289 ConstantFP
*Op1C
= dyn_cast
<ConstantFP
>(Call
->getArgOperand(1));
3291 const APFloat
&Op0
= Op0C
->getValueAPF();
3292 const APFloat
&Op1
= Op1C
->getValueAPF();
3297 case LibFunc_powf
: {
3298 // FIXME: Stop using the host math library.
3299 // FIXME: The computation isn't done in the right precision.
3300 Type
*Ty
= Op0C
->getType();
3301 if (Ty
->isDoubleTy() || Ty
->isFloatTy() || Ty
->isHalfTy()) {
3302 if (Ty
== Op1C
->getType())
3303 return ConstantFoldBinaryFP(pow
, Op0
, Op1
, Ty
) != nullptr;
3311 case LibFunc_remainderl
:
3312 case LibFunc_remainder
:
3313 case LibFunc_remainderf
:
3314 return Op0
.isNaN() || Op1
.isNaN() ||
3315 (!Op0
.isInfinity() && !Op1
.isZero());
3326 void TargetFolder::anchor() {}