1 //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the library calls simplifier. It does not implement
10 // any pass, but can't be used by other passes to do simplifications.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/ConstantFolding.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Transforms/Utils/Local.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/Analysis/CaptureTracking.h"
27 #include "llvm/Analysis/Loads.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Intrinsics.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Transforms/Utils/BuildLibCalls.h"
39 #include "llvm/Transforms/Utils/SizeOpts.h"
42 using namespace PatternMatch
;
45 EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden
,
47 cl::desc("Enable unsafe double to float "
48 "shrinking for math lib calls"));
51 //===----------------------------------------------------------------------===//
53 //===----------------------------------------------------------------------===//
55 static bool ignoreCallingConv(LibFunc Func
) {
56 return Func
== LibFunc_abs
|| Func
== LibFunc_labs
||
57 Func
== LibFunc_llabs
|| Func
== LibFunc_strlen
;
60 static bool isCallingConvCCompatible(CallInst
*CI
) {
61 switch(CI
->getCallingConv()) {
64 case llvm::CallingConv::C
:
66 case llvm::CallingConv::ARM_APCS
:
67 case llvm::CallingConv::ARM_AAPCS
:
68 case llvm::CallingConv::ARM_AAPCS_VFP
: {
70 // The iOS ABI diverges from the standard in some cases, so for now don't
71 // try to simplify those calls.
72 if (Triple(CI
->getModule()->getTargetTriple()).isiOS())
75 auto *FuncTy
= CI
->getFunctionType();
77 if (!FuncTy
->getReturnType()->isPointerTy() &&
78 !FuncTy
->getReturnType()->isIntegerTy() &&
79 !FuncTy
->getReturnType()->isVoidTy())
82 for (auto Param
: FuncTy
->params()) {
83 if (!Param
->isPointerTy() && !Param
->isIntegerTy())
92 /// Return true if it is only used in equality comparisons with With.
93 static bool isOnlyUsedInEqualityComparison(Value
*V
, Value
*With
) {
94 for (User
*U
: V
->users()) {
95 if (ICmpInst
*IC
= dyn_cast
<ICmpInst
>(U
))
96 if (IC
->isEquality() && IC
->getOperand(1) == With
)
98 // Unknown instruction.
104 static bool callHasFloatingPointArgument(const CallInst
*CI
) {
105 return any_of(CI
->operands(), [](const Use
&OI
) {
106 return OI
->getType()->isFloatingPointTy();
110 static bool callHasFP128Argument(const CallInst
*CI
) {
111 return any_of(CI
->operands(), [](const Use
&OI
) {
112 return OI
->getType()->isFP128Ty();
116 static Value
*convertStrToNumber(CallInst
*CI
, StringRef
&Str
, int64_t Base
) {
117 if (Base
< 2 || Base
> 36)
118 // handle special zero base
123 std::string nptr
= Str
.str();
125 long long int Result
= strtoll(nptr
.c_str(), &End
, Base
);
129 // if we assume all possible target locales are ASCII supersets,
130 // then if strtoll successfully parses a number on the host,
131 // it will also successfully parse the same way on the target
135 if (!isIntN(CI
->getType()->getPrimitiveSizeInBits(), Result
))
138 return ConstantInt::get(CI
->getType(), Result
);
141 static bool isLocallyOpenedFile(Value
*File
, CallInst
*CI
, IRBuilder
<> &B
,
142 const TargetLibraryInfo
*TLI
) {
143 CallInst
*FOpen
= dyn_cast
<CallInst
>(File
);
147 Function
*InnerCallee
= FOpen
->getCalledFunction();
152 if (!TLI
->getLibFunc(*InnerCallee
, Func
) || !TLI
->has(Func
) ||
153 Func
!= LibFunc_fopen
)
156 inferLibFuncAttributes(*CI
->getCalledFunction(), *TLI
);
157 if (PointerMayBeCaptured(File
, true, true))
163 static bool isOnlyUsedInComparisonWithZero(Value
*V
) {
164 for (User
*U
: V
->users()) {
165 if (ICmpInst
*IC
= dyn_cast
<ICmpInst
>(U
))
166 if (Constant
*C
= dyn_cast
<Constant
>(IC
->getOperand(1)))
167 if (C
->isNullValue())
169 // Unknown instruction.
175 static bool canTransformToMemCmp(CallInst
*CI
, Value
*Str
, uint64_t Len
,
176 const DataLayout
&DL
) {
177 if (!isOnlyUsedInComparisonWithZero(CI
))
180 if (!isDereferenceableAndAlignedPointer(Str
, 1, APInt(64, Len
), DL
))
183 if (CI
->getFunction()->hasFnAttribute(Attribute::SanitizeMemory
))
189 //===----------------------------------------------------------------------===//
190 // String and Memory Library Call Optimizations
191 //===----------------------------------------------------------------------===//
193 Value
*LibCallSimplifier::optimizeStrCat(CallInst
*CI
, IRBuilder
<> &B
) {
194 // Extract some information from the instruction
195 Value
*Dst
= CI
->getArgOperand(0);
196 Value
*Src
= CI
->getArgOperand(1);
198 // See if we can get the length of the input string.
199 uint64_t Len
= GetStringLength(Src
);
202 --Len
; // Unbias length.
204 // Handle the simple, do-nothing case: strcat(x, "") -> x
208 return emitStrLenMemCpy(Src
, Dst
, Len
, B
);
211 Value
*LibCallSimplifier::emitStrLenMemCpy(Value
*Src
, Value
*Dst
, uint64_t Len
,
213 // We need to find the end of the destination string. That's where the
214 // memory is to be moved to. We just generate a call to strlen.
215 Value
*DstLen
= emitStrLen(Dst
, B
, DL
, TLI
);
219 // Now that we have the destination's length, we must index into the
220 // destination's pointer to get the actual memcpy destination (end of
221 // the string .. we're concatenating).
222 Value
*CpyDst
= B
.CreateGEP(B
.getInt8Ty(), Dst
, DstLen
, "endptr");
224 // We have enough information to now generate the memcpy call to do the
225 // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
226 B
.CreateMemCpy(CpyDst
, 1, Src
, 1,
227 ConstantInt::get(DL
.getIntPtrType(Src
->getContext()), Len
+ 1));
231 Value
*LibCallSimplifier::optimizeStrNCat(CallInst
*CI
, IRBuilder
<> &B
) {
232 // Extract some information from the instruction.
233 Value
*Dst
= CI
->getArgOperand(0);
234 Value
*Src
= CI
->getArgOperand(1);
237 // We don't do anything if length is not constant.
238 if (ConstantInt
*LengthArg
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2)))
239 Len
= LengthArg
->getZExtValue();
243 // See if we can get the length of the input string.
244 uint64_t SrcLen
= GetStringLength(Src
);
247 --SrcLen
; // Unbias length.
249 // Handle the simple, do-nothing cases:
250 // strncat(x, "", c) -> x
251 // strncat(x, c, 0) -> x
252 if (SrcLen
== 0 || Len
== 0)
255 // We don't optimize this case.
259 // strncat(x, s, c) -> strcat(x, s)
260 // s is constant so the strcat can be optimized further.
261 return emitStrLenMemCpy(Src
, Dst
, SrcLen
, B
);
264 Value
*LibCallSimplifier::optimizeStrChr(CallInst
*CI
, IRBuilder
<> &B
) {
265 Function
*Callee
= CI
->getCalledFunction();
266 FunctionType
*FT
= Callee
->getFunctionType();
267 Value
*SrcStr
= CI
->getArgOperand(0);
269 // If the second operand is non-constant, see if we can compute the length
270 // of the input string and turn this into memchr.
271 ConstantInt
*CharC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
273 uint64_t Len
= GetStringLength(SrcStr
);
274 if (Len
== 0 || !FT
->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
277 return emitMemChr(SrcStr
, CI
->getArgOperand(1), // include nul.
278 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len
),
282 // Otherwise, the character is a constant, see if the first argument is
283 // a string literal. If so, we can constant fold.
285 if (!getConstantStringInfo(SrcStr
, Str
)) {
286 if (CharC
->isZero()) // strchr(p, 0) -> p + strlen(p)
287 return B
.CreateGEP(B
.getInt8Ty(), SrcStr
, emitStrLen(SrcStr
, B
, DL
, TLI
),
292 // Compute the offset, make sure to handle the case when we're searching for
293 // zero (a weird way to spell strlen).
294 size_t I
= (0xFF & CharC
->getSExtValue()) == 0
296 : Str
.find(CharC
->getSExtValue());
297 if (I
== StringRef::npos
) // Didn't find the char. strchr returns null.
298 return Constant::getNullValue(CI
->getType());
300 // strchr(s+n,c) -> gep(s+n+i,c)
301 return B
.CreateGEP(B
.getInt8Ty(), SrcStr
, B
.getInt64(I
), "strchr");
304 Value
*LibCallSimplifier::optimizeStrRChr(CallInst
*CI
, IRBuilder
<> &B
) {
305 Value
*SrcStr
= CI
->getArgOperand(0);
306 ConstantInt
*CharC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
308 // Cannot fold anything if we're not looking for a constant.
313 if (!getConstantStringInfo(SrcStr
, Str
)) {
314 // strrchr(s, 0) -> strchr(s, 0)
316 return emitStrChr(SrcStr
, '\0', B
, TLI
);
320 // Compute the offset.
321 size_t I
= (0xFF & CharC
->getSExtValue()) == 0
323 : Str
.rfind(CharC
->getSExtValue());
324 if (I
== StringRef::npos
) // Didn't find the char. Return null.
325 return Constant::getNullValue(CI
->getType());
327 // strrchr(s+n,c) -> gep(s+n+i,c)
328 return B
.CreateGEP(B
.getInt8Ty(), SrcStr
, B
.getInt64(I
), "strrchr");
331 Value
*LibCallSimplifier::optimizeStrCmp(CallInst
*CI
, IRBuilder
<> &B
) {
332 Value
*Str1P
= CI
->getArgOperand(0), *Str2P
= CI
->getArgOperand(1);
333 if (Str1P
== Str2P
) // strcmp(x,x) -> 0
334 return ConstantInt::get(CI
->getType(), 0);
336 StringRef Str1
, Str2
;
337 bool HasStr1
= getConstantStringInfo(Str1P
, Str1
);
338 bool HasStr2
= getConstantStringInfo(Str2P
, Str2
);
340 // strcmp(x, y) -> cnst (if both x and y are constant strings)
341 if (HasStr1
&& HasStr2
)
342 return ConstantInt::get(CI
->getType(), Str1
.compare(Str2
));
344 if (HasStr1
&& Str1
.empty()) // strcmp("", x) -> -*x
345 return B
.CreateNeg(B
.CreateZExt(
346 B
.CreateLoad(B
.getInt8Ty(), Str2P
, "strcmpload"), CI
->getType()));
348 if (HasStr2
&& Str2
.empty()) // strcmp(x,"") -> *x
349 return B
.CreateZExt(B
.CreateLoad(B
.getInt8Ty(), Str1P
, "strcmpload"),
352 // strcmp(P, "x") -> memcmp(P, "x", 2)
353 uint64_t Len1
= GetStringLength(Str1P
);
354 uint64_t Len2
= GetStringLength(Str2P
);
356 return emitMemCmp(Str1P
, Str2P
,
357 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()),
358 std::min(Len1
, Len2
)),
363 if (!HasStr1
&& HasStr2
) {
364 if (canTransformToMemCmp(CI
, Str1P
, Len2
, DL
))
367 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len2
), B
, DL
,
369 } else if (HasStr1
&& !HasStr2
) {
370 if (canTransformToMemCmp(CI
, Str2P
, Len1
, DL
))
373 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len1
), B
, DL
,
380 Value
*LibCallSimplifier::optimizeStrNCmp(CallInst
*CI
, IRBuilder
<> &B
) {
381 Value
*Str1P
= CI
->getArgOperand(0), *Str2P
= CI
->getArgOperand(1);
382 if (Str1P
== Str2P
) // strncmp(x,x,n) -> 0
383 return ConstantInt::get(CI
->getType(), 0);
385 // Get the length argument if it is constant.
387 if (ConstantInt
*LengthArg
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2)))
388 Length
= LengthArg
->getZExtValue();
392 if (Length
== 0) // strncmp(x,y,0) -> 0
393 return ConstantInt::get(CI
->getType(), 0);
395 if (Length
== 1) // strncmp(x,y,1) -> memcmp(x,y,1)
396 return emitMemCmp(Str1P
, Str2P
, CI
->getArgOperand(2), B
, DL
, TLI
);
398 StringRef Str1
, Str2
;
399 bool HasStr1
= getConstantStringInfo(Str1P
, Str1
);
400 bool HasStr2
= getConstantStringInfo(Str2P
, Str2
);
402 // strncmp(x, y) -> cnst (if both x and y are constant strings)
403 if (HasStr1
&& HasStr2
) {
404 StringRef SubStr1
= Str1
.substr(0, Length
);
405 StringRef SubStr2
= Str2
.substr(0, Length
);
406 return ConstantInt::get(CI
->getType(), SubStr1
.compare(SubStr2
));
409 if (HasStr1
&& Str1
.empty()) // strncmp("", x, n) -> -*x
410 return B
.CreateNeg(B
.CreateZExt(
411 B
.CreateLoad(B
.getInt8Ty(), Str2P
, "strcmpload"), CI
->getType()));
413 if (HasStr2
&& Str2
.empty()) // strncmp(x, "", n) -> *x
414 return B
.CreateZExt(B
.CreateLoad(B
.getInt8Ty(), Str1P
, "strcmpload"),
417 uint64_t Len1
= GetStringLength(Str1P
);
418 uint64_t Len2
= GetStringLength(Str2P
);
421 if (!HasStr1
&& HasStr2
) {
422 Len2
= std::min(Len2
, Length
);
423 if (canTransformToMemCmp(CI
, Str1P
, Len2
, DL
))
426 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len2
), B
, DL
,
428 } else if (HasStr1
&& !HasStr2
) {
429 Len1
= std::min(Len1
, Length
);
430 if (canTransformToMemCmp(CI
, Str2P
, Len1
, DL
))
433 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len1
), B
, DL
,
440 Value
*LibCallSimplifier::optimizeStrCpy(CallInst
*CI
, IRBuilder
<> &B
) {
441 Value
*Dst
= CI
->getArgOperand(0), *Src
= CI
->getArgOperand(1);
442 if (Dst
== Src
) // strcpy(x,x) -> x
445 // See if we can get the length of the input string.
446 uint64_t Len
= GetStringLength(Src
);
450 // We have enough information to now generate the memcpy call to do the
451 // copy for us. Make a memcpy to copy the nul byte with align = 1.
452 B
.CreateMemCpy(Dst
, 1, Src
, 1,
453 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len
));
457 Value
*LibCallSimplifier::optimizeStpCpy(CallInst
*CI
, IRBuilder
<> &B
) {
458 Function
*Callee
= CI
->getCalledFunction();
459 Value
*Dst
= CI
->getArgOperand(0), *Src
= CI
->getArgOperand(1);
460 if (Dst
== Src
) { // stpcpy(x,x) -> x+strlen(x)
461 Value
*StrLen
= emitStrLen(Src
, B
, DL
, TLI
);
462 return StrLen
? B
.CreateInBoundsGEP(B
.getInt8Ty(), Dst
, StrLen
) : nullptr;
465 // See if we can get the length of the input string.
466 uint64_t Len
= GetStringLength(Src
);
470 Type
*PT
= Callee
->getFunctionType()->getParamType(0);
471 Value
*LenV
= ConstantInt::get(DL
.getIntPtrType(PT
), Len
);
472 Value
*DstEnd
= B
.CreateGEP(B
.getInt8Ty(), Dst
,
473 ConstantInt::get(DL
.getIntPtrType(PT
), Len
- 1));
475 // We have enough information to now generate the memcpy call to do the
476 // copy for us. Make a memcpy to copy the nul byte with align = 1.
477 B
.CreateMemCpy(Dst
, 1, Src
, 1, LenV
);
481 Value
*LibCallSimplifier::optimizeStrNCpy(CallInst
*CI
, IRBuilder
<> &B
) {
482 Function
*Callee
= CI
->getCalledFunction();
483 Value
*Dst
= CI
->getArgOperand(0);
484 Value
*Src
= CI
->getArgOperand(1);
485 Value
*LenOp
= CI
->getArgOperand(2);
487 // See if we can get the length of the input string.
488 uint64_t SrcLen
= GetStringLength(Src
);
494 // strncpy(x, "", y) -> memset(align 1 x, '\0', y)
495 B
.CreateMemSet(Dst
, B
.getInt8('\0'), LenOp
, 1);
500 if (ConstantInt
*LengthArg
= dyn_cast
<ConstantInt
>(LenOp
))
501 Len
= LengthArg
->getZExtValue();
506 return Dst
; // strncpy(x, y, 0) -> x
508 // Let strncpy handle the zero padding
509 if (Len
> SrcLen
+ 1)
512 Type
*PT
= Callee
->getFunctionType()->getParamType(0);
513 // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
514 B
.CreateMemCpy(Dst
, 1, Src
, 1, ConstantInt::get(DL
.getIntPtrType(PT
), Len
));
519 Value
*LibCallSimplifier::optimizeStringLength(CallInst
*CI
, IRBuilder
<> &B
,
521 Value
*Src
= CI
->getArgOperand(0);
523 // Constant folding: strlen("xyz") -> 3
524 if (uint64_t Len
= GetStringLength(Src
, CharSize
))
525 return ConstantInt::get(CI
->getType(), Len
- 1);
527 // If s is a constant pointer pointing to a string literal, we can fold
528 // strlen(s + x) to strlen(s) - x, when x is known to be in the range
529 // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
530 // We only try to simplify strlen when the pointer s points to an array
531 // of i8. Otherwise, we would need to scale the offset x before doing the
532 // subtraction. This will make the optimization more complex, and it's not
533 // very useful because calling strlen for a pointer of other types is
535 if (GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(Src
)) {
536 if (!isGEPBasedOnPointerToString(GEP
, CharSize
))
539 ConstantDataArraySlice Slice
;
540 if (getConstantDataArrayInfo(GEP
->getOperand(0), Slice
, CharSize
)) {
541 uint64_t NullTermIdx
;
542 if (Slice
.Array
== nullptr) {
545 NullTermIdx
= ~((uint64_t)0);
546 for (uint64_t I
= 0, E
= Slice
.Length
; I
< E
; ++I
) {
547 if (Slice
.Array
->getElementAsInteger(I
+ Slice
.Offset
) == 0) {
552 // If the string does not have '\0', leave it to strlen to compute
554 if (NullTermIdx
== ~((uint64_t)0))
558 Value
*Offset
= GEP
->getOperand(2);
559 KnownBits Known
= computeKnownBits(Offset
, DL
, 0, nullptr, CI
, nullptr);
560 Known
.Zero
.flipAllBits();
562 cast
<ArrayType
>(GEP
->getSourceElementType())->getNumElements();
564 // KnownZero's bits are flipped, so zeros in KnownZero now represent
565 // bits known to be zeros in Offset, and ones in KnowZero represent
566 // bits unknown in Offset. Therefore, Offset is known to be in range
567 // [0, NullTermIdx] when the flipped KnownZero is non-negative and
568 // unsigned-less-than NullTermIdx.
570 // If Offset is not provably in the range [0, NullTermIdx], we can still
571 // optimize if we can prove that the program has undefined behavior when
572 // Offset is outside that range. That is the case when GEP->getOperand(0)
573 // is a pointer to an object whose memory extent is NullTermIdx+1.
574 if ((Known
.Zero
.isNonNegative() && Known
.Zero
.ule(NullTermIdx
)) ||
575 (GEP
->isInBounds() && isa
<GlobalVariable
>(GEP
->getOperand(0)) &&
576 NullTermIdx
== ArrSize
- 1)) {
577 Offset
= B
.CreateSExtOrTrunc(Offset
, CI
->getType());
578 return B
.CreateSub(ConstantInt::get(CI
->getType(), NullTermIdx
),
586 // strlen(x?"foo":"bars") --> x ? 3 : 4
587 if (SelectInst
*SI
= dyn_cast
<SelectInst
>(Src
)) {
588 uint64_t LenTrue
= GetStringLength(SI
->getTrueValue(), CharSize
);
589 uint64_t LenFalse
= GetStringLength(SI
->getFalseValue(), CharSize
);
590 if (LenTrue
&& LenFalse
) {
592 return OptimizationRemark("instcombine", "simplify-libcalls", CI
)
593 << "folded strlen(select) to select of constants";
595 return B
.CreateSelect(SI
->getCondition(),
596 ConstantInt::get(CI
->getType(), LenTrue
- 1),
597 ConstantInt::get(CI
->getType(), LenFalse
- 1));
601 // strlen(x) != 0 --> *x != 0
602 // strlen(x) == 0 --> *x == 0
603 if (isOnlyUsedInZeroEqualityComparison(CI
))
604 return B
.CreateZExt(B
.CreateLoad(B
.getIntNTy(CharSize
), Src
, "strlenfirst"),
610 Value
*LibCallSimplifier::optimizeStrLen(CallInst
*CI
, IRBuilder
<> &B
) {
611 return optimizeStringLength(CI
, B
, 8);
614 Value
*LibCallSimplifier::optimizeWcslen(CallInst
*CI
, IRBuilder
<> &B
) {
615 Module
&M
= *CI
->getModule();
616 unsigned WCharSize
= TLI
->getWCharSize(M
) * 8;
617 // We cannot perform this optimization without wchar_size metadata.
621 return optimizeStringLength(CI
, B
, WCharSize
);
624 Value
*LibCallSimplifier::optimizeStrPBrk(CallInst
*CI
, IRBuilder
<> &B
) {
626 bool HasS1
= getConstantStringInfo(CI
->getArgOperand(0), S1
);
627 bool HasS2
= getConstantStringInfo(CI
->getArgOperand(1), S2
);
629 // strpbrk(s, "") -> nullptr
630 // strpbrk("", s) -> nullptr
631 if ((HasS1
&& S1
.empty()) || (HasS2
&& S2
.empty()))
632 return Constant::getNullValue(CI
->getType());
635 if (HasS1
&& HasS2
) {
636 size_t I
= S1
.find_first_of(S2
);
637 if (I
== StringRef::npos
) // No match.
638 return Constant::getNullValue(CI
->getType());
640 return B
.CreateGEP(B
.getInt8Ty(), CI
->getArgOperand(0), B
.getInt64(I
),
644 // strpbrk(s, "a") -> strchr(s, 'a')
645 if (HasS2
&& S2
.size() == 1)
646 return emitStrChr(CI
->getArgOperand(0), S2
[0], B
, TLI
);
651 Value
*LibCallSimplifier::optimizeStrTo(CallInst
*CI
, IRBuilder
<> &B
) {
652 Value
*EndPtr
= CI
->getArgOperand(1);
653 if (isa
<ConstantPointerNull
>(EndPtr
)) {
654 // With a null EndPtr, this function won't capture the main argument.
655 // It would be readonly too, except that it still may write to errno.
656 CI
->addParamAttr(0, Attribute::NoCapture
);
662 Value
*LibCallSimplifier::optimizeStrSpn(CallInst
*CI
, IRBuilder
<> &B
) {
664 bool HasS1
= getConstantStringInfo(CI
->getArgOperand(0), S1
);
665 bool HasS2
= getConstantStringInfo(CI
->getArgOperand(1), S2
);
667 // strspn(s, "") -> 0
668 // strspn("", s) -> 0
669 if ((HasS1
&& S1
.empty()) || (HasS2
&& S2
.empty()))
670 return Constant::getNullValue(CI
->getType());
673 if (HasS1
&& HasS2
) {
674 size_t Pos
= S1
.find_first_not_of(S2
);
675 if (Pos
== StringRef::npos
)
677 return ConstantInt::get(CI
->getType(), Pos
);
683 Value
*LibCallSimplifier::optimizeStrCSpn(CallInst
*CI
, IRBuilder
<> &B
) {
685 bool HasS1
= getConstantStringInfo(CI
->getArgOperand(0), S1
);
686 bool HasS2
= getConstantStringInfo(CI
->getArgOperand(1), S2
);
688 // strcspn("", s) -> 0
689 if (HasS1
&& S1
.empty())
690 return Constant::getNullValue(CI
->getType());
693 if (HasS1
&& HasS2
) {
694 size_t Pos
= S1
.find_first_of(S2
);
695 if (Pos
== StringRef::npos
)
697 return ConstantInt::get(CI
->getType(), Pos
);
700 // strcspn(s, "") -> strlen(s)
701 if (HasS2
&& S2
.empty())
702 return emitStrLen(CI
->getArgOperand(0), B
, DL
, TLI
);
707 Value
*LibCallSimplifier::optimizeStrStr(CallInst
*CI
, IRBuilder
<> &B
) {
708 // fold strstr(x, x) -> x.
709 if (CI
->getArgOperand(0) == CI
->getArgOperand(1))
710 return B
.CreateBitCast(CI
->getArgOperand(0), CI
->getType());
712 // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
713 if (isOnlyUsedInEqualityComparison(CI
, CI
->getArgOperand(0))) {
714 Value
*StrLen
= emitStrLen(CI
->getArgOperand(1), B
, DL
, TLI
);
717 Value
*StrNCmp
= emitStrNCmp(CI
->getArgOperand(0), CI
->getArgOperand(1),
721 for (auto UI
= CI
->user_begin(), UE
= CI
->user_end(); UI
!= UE
;) {
722 ICmpInst
*Old
= cast
<ICmpInst
>(*UI
++);
724 B
.CreateICmp(Old
->getPredicate(), StrNCmp
,
725 ConstantInt::getNullValue(StrNCmp
->getType()), "cmp");
726 replaceAllUsesWith(Old
, Cmp
);
731 // See if either input string is a constant string.
732 StringRef SearchStr
, ToFindStr
;
733 bool HasStr1
= getConstantStringInfo(CI
->getArgOperand(0), SearchStr
);
734 bool HasStr2
= getConstantStringInfo(CI
->getArgOperand(1), ToFindStr
);
736 // fold strstr(x, "") -> x.
737 if (HasStr2
&& ToFindStr
.empty())
738 return B
.CreateBitCast(CI
->getArgOperand(0), CI
->getType());
740 // If both strings are known, constant fold it.
741 if (HasStr1
&& HasStr2
) {
742 size_t Offset
= SearchStr
.find(ToFindStr
);
744 if (Offset
== StringRef::npos
) // strstr("foo", "bar") -> null
745 return Constant::getNullValue(CI
->getType());
747 // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
748 Value
*Result
= castToCStr(CI
->getArgOperand(0), B
);
750 B
.CreateConstInBoundsGEP1_64(B
.getInt8Ty(), Result
, Offset
, "strstr");
751 return B
.CreateBitCast(Result
, CI
->getType());
754 // fold strstr(x, "y") -> strchr(x, 'y').
755 if (HasStr2
&& ToFindStr
.size() == 1) {
756 Value
*StrChr
= emitStrChr(CI
->getArgOperand(0), ToFindStr
[0], B
, TLI
);
757 return StrChr
? B
.CreateBitCast(StrChr
, CI
->getType()) : nullptr;
762 Value
*LibCallSimplifier::optimizeMemChr(CallInst
*CI
, IRBuilder
<> &B
) {
763 Value
*SrcStr
= CI
->getArgOperand(0);
764 ConstantInt
*CharC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
765 ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2));
767 // memchr(x, y, 0) -> null
768 if (LenC
&& LenC
->isZero())
769 return Constant::getNullValue(CI
->getType());
771 // From now on we need at least constant length and string.
773 if (!LenC
|| !getConstantStringInfo(SrcStr
, Str
, 0, /*TrimAtNul=*/false))
776 // Truncate the string to LenC. If Str is smaller than LenC we will still only
777 // scan the string, as reading past the end of it is undefined and we can just
778 // return null if we don't find the char.
779 Str
= Str
.substr(0, LenC
->getZExtValue());
781 // If the char is variable but the input str and length are not we can turn
782 // this memchr call into a simple bit field test. Of course this only works
783 // when the return value is only checked against null.
785 // It would be really nice to reuse switch lowering here but we can't change
786 // the CFG at this point.
788 // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
790 // after bounds check.
791 if (!CharC
&& !Str
.empty() && isOnlyUsedInZeroEqualityComparison(CI
)) {
793 *std::max_element(reinterpret_cast<const unsigned char *>(Str
.begin()),
794 reinterpret_cast<const unsigned char *>(Str
.end()));
796 // Make sure the bit field we're about to create fits in a register on the
798 // FIXME: On a 64 bit architecture this prevents us from using the
799 // interesting range of alpha ascii chars. We could do better by emitting
800 // two bitfields or shifting the range by 64 if no lower chars are used.
801 if (!DL
.fitsInLegalInteger(Max
+ 1))
804 // For the bit field use a power-of-2 type with at least 8 bits to avoid
805 // creating unnecessary illegal types.
806 unsigned char Width
= NextPowerOf2(std::max((unsigned char)7, Max
));
808 // Now build the bit field.
809 APInt
Bitfield(Width
, 0);
811 Bitfield
.setBit((unsigned char)C
);
812 Value
*BitfieldC
= B
.getInt(Bitfield
);
814 // Adjust width of "C" to the bitfield width, then mask off the high bits.
815 Value
*C
= B
.CreateZExtOrTrunc(CI
->getArgOperand(1), BitfieldC
->getType());
816 C
= B
.CreateAnd(C
, B
.getIntN(Width
, 0xFF));
818 // First check that the bit field access is within bounds.
819 Value
*Bounds
= B
.CreateICmp(ICmpInst::ICMP_ULT
, C
, B
.getIntN(Width
, Width
),
822 // Create code that checks if the given bit is set in the field.
823 Value
*Shl
= B
.CreateShl(B
.getIntN(Width
, 1ULL), C
);
824 Value
*Bits
= B
.CreateIsNotNull(B
.CreateAnd(Shl
, BitfieldC
), "memchr.bits");
826 // Finally merge both checks and cast to pointer type. The inttoptr
827 // implicitly zexts the i1 to intptr type.
828 return B
.CreateIntToPtr(B
.CreateAnd(Bounds
, Bits
, "memchr"), CI
->getType());
831 // Check if all arguments are constants. If so, we can constant fold.
835 // Compute the offset.
836 size_t I
= Str
.find(CharC
->getSExtValue() & 0xFF);
837 if (I
== StringRef::npos
) // Didn't find the char. memchr returns null.
838 return Constant::getNullValue(CI
->getType());
840 // memchr(s+n,c,l) -> gep(s+n+i,c)
841 return B
.CreateGEP(B
.getInt8Ty(), SrcStr
, B
.getInt64(I
), "memchr");
844 static Value
*optimizeMemCmpConstantSize(CallInst
*CI
, Value
*LHS
, Value
*RHS
,
845 uint64_t Len
, IRBuilder
<> &B
,
846 const DataLayout
&DL
) {
847 if (Len
== 0) // memcmp(s1,s2,0) -> 0
848 return Constant::getNullValue(CI
->getType());
850 // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
853 B
.CreateZExt(B
.CreateLoad(B
.getInt8Ty(), castToCStr(LHS
, B
), "lhsc"),
854 CI
->getType(), "lhsv");
856 B
.CreateZExt(B
.CreateLoad(B
.getInt8Ty(), castToCStr(RHS
, B
), "rhsc"),
857 CI
->getType(), "rhsv");
858 return B
.CreateSub(LHSV
, RHSV
, "chardiff");
861 // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
862 // TODO: The case where both inputs are constants does not need to be limited
863 // to legal integers or equality comparison. See block below this.
864 if (DL
.isLegalInteger(Len
* 8) && isOnlyUsedInZeroEqualityComparison(CI
)) {
865 IntegerType
*IntType
= IntegerType::get(CI
->getContext(), Len
* 8);
866 unsigned PrefAlignment
= DL
.getPrefTypeAlignment(IntType
);
868 // First, see if we can fold either argument to a constant.
869 Value
*LHSV
= nullptr;
870 if (auto *LHSC
= dyn_cast
<Constant
>(LHS
)) {
871 LHSC
= ConstantExpr::getBitCast(LHSC
, IntType
->getPointerTo());
872 LHSV
= ConstantFoldLoadFromConstPtr(LHSC
, IntType
, DL
);
874 Value
*RHSV
= nullptr;
875 if (auto *RHSC
= dyn_cast
<Constant
>(RHS
)) {
876 RHSC
= ConstantExpr::getBitCast(RHSC
, IntType
->getPointerTo());
877 RHSV
= ConstantFoldLoadFromConstPtr(RHSC
, IntType
, DL
);
880 // Don't generate unaligned loads. If either source is constant data,
881 // alignment doesn't matter for that source because there is no load.
882 if ((LHSV
|| getKnownAlignment(LHS
, DL
, CI
) >= PrefAlignment
) &&
883 (RHSV
|| getKnownAlignment(RHS
, DL
, CI
) >= PrefAlignment
)) {
886 IntType
->getPointerTo(LHS
->getType()->getPointerAddressSpace());
887 LHSV
= B
.CreateLoad(IntType
, B
.CreateBitCast(LHS
, LHSPtrTy
), "lhsv");
891 IntType
->getPointerTo(RHS
->getType()->getPointerAddressSpace());
892 RHSV
= B
.CreateLoad(IntType
, B
.CreateBitCast(RHS
, RHSPtrTy
), "rhsv");
894 return B
.CreateZExt(B
.CreateICmpNE(LHSV
, RHSV
), CI
->getType(), "memcmp");
898 // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
899 // TODO: This is limited to i8 arrays.
900 StringRef LHSStr
, RHSStr
;
901 if (getConstantStringInfo(LHS
, LHSStr
) &&
902 getConstantStringInfo(RHS
, RHSStr
)) {
903 // Make sure we're not reading out-of-bounds memory.
904 if (Len
> LHSStr
.size() || Len
> RHSStr
.size())
906 // Fold the memcmp and normalize the result. This way we get consistent
907 // results across multiple platforms.
909 int Cmp
= memcmp(LHSStr
.data(), RHSStr
.data(), Len
);
914 return ConstantInt::get(CI
->getType(), Ret
);
919 // Most simplifications for memcmp also apply to bcmp.
920 Value
*LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst
*CI
,
922 Value
*LHS
= CI
->getArgOperand(0), *RHS
= CI
->getArgOperand(1);
923 Value
*Size
= CI
->getArgOperand(2);
925 if (LHS
== RHS
) // memcmp(s,s,x) -> 0
926 return Constant::getNullValue(CI
->getType());
928 // Handle constant lengths.
929 if (ConstantInt
*LenC
= dyn_cast
<ConstantInt
>(Size
))
930 if (Value
*Res
= optimizeMemCmpConstantSize(CI
, LHS
, RHS
,
931 LenC
->getZExtValue(), B
, DL
))
937 Value
*LibCallSimplifier::optimizeMemCmp(CallInst
*CI
, IRBuilder
<> &B
) {
938 if (Value
*V
= optimizeMemCmpBCmpCommon(CI
, B
))
941 // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
942 // bcmp can be more efficient than memcmp because it only has to know that
943 // there is a difference, not how different one is to the other.
944 if (TLI
->has(LibFunc_bcmp
) && isOnlyUsedInZeroEqualityComparison(CI
)) {
945 Value
*LHS
= CI
->getArgOperand(0);
946 Value
*RHS
= CI
->getArgOperand(1);
947 Value
*Size
= CI
->getArgOperand(2);
948 return emitBCmp(LHS
, RHS
, Size
, B
, DL
, TLI
);
954 Value
*LibCallSimplifier::optimizeBCmp(CallInst
*CI
, IRBuilder
<> &B
) {
955 return optimizeMemCmpBCmpCommon(CI
, B
);
958 Value
*LibCallSimplifier::optimizeMemCpy(CallInst
*CI
, IRBuilder
<> &B
) {
959 // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
960 B
.CreateMemCpy(CI
->getArgOperand(0), 1, CI
->getArgOperand(1), 1,
961 CI
->getArgOperand(2));
962 return CI
->getArgOperand(0);
965 Value
*LibCallSimplifier::optimizeMemMove(CallInst
*CI
, IRBuilder
<> &B
) {
966 // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
967 B
.CreateMemMove(CI
->getArgOperand(0), 1, CI
->getArgOperand(1), 1,
968 CI
->getArgOperand(2));
969 return CI
->getArgOperand(0);
972 /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
973 Value
*LibCallSimplifier::foldMallocMemset(CallInst
*Memset
, IRBuilder
<> &B
) {
974 // This has to be a memset of zeros (bzero).
975 auto *FillValue
= dyn_cast
<ConstantInt
>(Memset
->getArgOperand(1));
976 if (!FillValue
|| FillValue
->getZExtValue() != 0)
979 // TODO: We should handle the case where the malloc has more than one use.
980 // This is necessary to optimize common patterns such as when the result of
981 // the malloc is checked against null or when a memset intrinsic is used in
982 // place of a memset library call.
983 auto *Malloc
= dyn_cast
<CallInst
>(Memset
->getArgOperand(0));
984 if (!Malloc
|| !Malloc
->hasOneUse())
987 // Is the inner call really malloc()?
988 Function
*InnerCallee
= Malloc
->getCalledFunction();
993 if (!TLI
->getLibFunc(*InnerCallee
, Func
) || !TLI
->has(Func
) ||
994 Func
!= LibFunc_malloc
)
997 // The memset must cover the same number of bytes that are malloc'd.
998 if (Memset
->getArgOperand(2) != Malloc
->getArgOperand(0))
1001 // Replace the malloc with a calloc. We need the data layout to know what the
1002 // actual size of a 'size_t' parameter is.
1003 B
.SetInsertPoint(Malloc
->getParent(), ++Malloc
->getIterator());
1004 const DataLayout
&DL
= Malloc
->getModule()->getDataLayout();
1005 IntegerType
*SizeType
= DL
.getIntPtrType(B
.GetInsertBlock()->getContext());
1006 Value
*Calloc
= emitCalloc(ConstantInt::get(SizeType
, 1),
1007 Malloc
->getArgOperand(0), Malloc
->getAttributes(),
1012 Malloc
->replaceAllUsesWith(Calloc
);
1013 eraseFromParent(Malloc
);
1018 Value
*LibCallSimplifier::optimizeMemSet(CallInst
*CI
, IRBuilder
<> &B
) {
1019 if (auto *Calloc
= foldMallocMemset(CI
, B
))
1022 // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
1023 Value
*Val
= B
.CreateIntCast(CI
->getArgOperand(1), B
.getInt8Ty(), false);
1024 B
.CreateMemSet(CI
->getArgOperand(0), Val
, CI
->getArgOperand(2), 1);
1025 return CI
->getArgOperand(0);
1028 Value
*LibCallSimplifier::optimizeRealloc(CallInst
*CI
, IRBuilder
<> &B
) {
1029 if (isa
<ConstantPointerNull
>(CI
->getArgOperand(0)))
1030 return emitMalloc(CI
->getArgOperand(1), B
, DL
, TLI
);
1035 //===----------------------------------------------------------------------===//
1036 // Math Library Optimizations
1037 //===----------------------------------------------------------------------===//
1039 // Replace a libcall \p CI with a call to intrinsic \p IID
1040 static Value
*replaceUnaryCall(CallInst
*CI
, IRBuilder
<> &B
, Intrinsic::ID IID
) {
1041 // Propagate fast-math flags from the existing call to the new call.
1042 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1043 B
.setFastMathFlags(CI
->getFastMathFlags());
1045 Module
*M
= CI
->getModule();
1046 Value
*V
= CI
->getArgOperand(0);
1047 Function
*F
= Intrinsic::getDeclaration(M
, IID
, CI
->getType());
1048 CallInst
*NewCall
= B
.CreateCall(F
, V
);
1049 NewCall
->takeName(CI
);
1053 /// Return a variant of Val with float type.
1054 /// Currently this works in two cases: If Val is an FPExtension of a float
1055 /// value to something bigger, simply return the operand.
1056 /// If Val is a ConstantFP but can be converted to a float ConstantFP without
1057 /// loss of precision do so.
1058 static Value
*valueHasFloatPrecision(Value
*Val
) {
1059 if (FPExtInst
*Cast
= dyn_cast
<FPExtInst
>(Val
)) {
1060 Value
*Op
= Cast
->getOperand(0);
1061 if (Op
->getType()->isFloatTy())
1064 if (ConstantFP
*Const
= dyn_cast
<ConstantFP
>(Val
)) {
1065 APFloat F
= Const
->getValueAPF();
1067 (void)F
.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven
,
1070 return ConstantFP::get(Const
->getContext(), F
);
1075 /// Shrink double -> float functions.
1076 static Value
*optimizeDoubleFP(CallInst
*CI
, IRBuilder
<> &B
,
1077 bool isBinary
, bool isPrecise
= false) {
1078 Function
*CalleeFn
= CI
->getCalledFunction();
1079 if (!CI
->getType()->isDoubleTy() || !CalleeFn
)
1082 // If not all the uses of the function are converted to float, then bail out.
1083 // This matters if the precision of the result is more important than the
1084 // precision of the arguments.
1086 for (User
*U
: CI
->users()) {
1087 FPTruncInst
*Cast
= dyn_cast
<FPTruncInst
>(U
);
1088 if (!Cast
|| !Cast
->getType()->isFloatTy())
1092 // If this is something like 'g((double) float)', convert to 'gf(float)'.
1094 V
[0] = valueHasFloatPrecision(CI
->getArgOperand(0));
1095 V
[1] = isBinary
? valueHasFloatPrecision(CI
->getArgOperand(1)) : nullptr;
1096 if (!V
[0] || (isBinary
&& !V
[1]))
1099 StringRef CalleeNm
= CalleeFn
->getName();
1100 AttributeList CalleeAt
= CalleeFn
->getAttributes();
1101 bool CalleeIn
= CalleeFn
->isIntrinsic();
1103 // If call isn't an intrinsic, check that it isn't within a function with the
1104 // same name as the float version of this call, otherwise the result is an
1105 // infinite loop. For example, from MinGW-w64:
1107 // float expf(float val) { return (float) exp((double) val); }
1109 const Function
*Fn
= CI
->getFunction();
1110 StringRef FnName
= Fn
->getName();
1111 if (FnName
.back() == 'f' &&
1112 FnName
.size() == (CalleeNm
.size() + 1) &&
1113 FnName
.startswith(CalleeNm
))
1117 // Propagate the math semantics from the current function to the new function.
1118 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1119 B
.setFastMathFlags(CI
->getFastMathFlags());
1121 // g((double) float) -> (double) gf(float)
1124 Module
*M
= CI
->getModule();
1125 Intrinsic::ID IID
= CalleeFn
->getIntrinsicID();
1126 Function
*Fn
= Intrinsic::getDeclaration(M
, IID
, B
.getFloatTy());
1127 R
= isBinary
? B
.CreateCall(Fn
, V
) : B
.CreateCall(Fn
, V
[0]);
1130 R
= isBinary
? emitBinaryFloatFnCall(V
[0], V
[1], CalleeNm
, B
, CalleeAt
)
1131 : emitUnaryFloatFnCall(V
[0], CalleeNm
, B
, CalleeAt
);
1133 return B
.CreateFPExt(R
, B
.getDoubleTy());
1136 /// Shrink double -> float for unary functions.
1137 static Value
*optimizeUnaryDoubleFP(CallInst
*CI
, IRBuilder
<> &B
,
1138 bool isPrecise
= false) {
1139 return optimizeDoubleFP(CI
, B
, false, isPrecise
);
1142 /// Shrink double -> float for binary functions.
1143 static Value
*optimizeBinaryDoubleFP(CallInst
*CI
, IRBuilder
<> &B
,
1144 bool isPrecise
= false) {
1145 return optimizeDoubleFP(CI
, B
, true, isPrecise
);
1148 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
1149 Value
*LibCallSimplifier::optimizeCAbs(CallInst
*CI
, IRBuilder
<> &B
) {
1153 // Propagate fast-math flags from the existing call to new instructions.
1154 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1155 B
.setFastMathFlags(CI
->getFastMathFlags());
1158 if (CI
->getNumArgOperands() == 1) {
1159 Value
*Op
= CI
->getArgOperand(0);
1160 assert(Op
->getType()->isArrayTy() && "Unexpected signature for cabs!");
1161 Real
= B
.CreateExtractValue(Op
, 0, "real");
1162 Imag
= B
.CreateExtractValue(Op
, 1, "imag");
1164 assert(CI
->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
1165 Real
= CI
->getArgOperand(0);
1166 Imag
= CI
->getArgOperand(1);
1169 Value
*RealReal
= B
.CreateFMul(Real
, Real
);
1170 Value
*ImagImag
= B
.CreateFMul(Imag
, Imag
);
1172 Function
*FSqrt
= Intrinsic::getDeclaration(CI
->getModule(), Intrinsic::sqrt
,
1174 return B
.CreateCall(FSqrt
, B
.CreateFAdd(RealReal
, ImagImag
), "cabs");
1177 static Value
*optimizeTrigReflections(CallInst
*Call
, LibFunc Func
,
1179 if (!isa
<FPMathOperator
>(Call
))
1182 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1183 B
.setFastMathFlags(Call
->getFastMathFlags());
1185 // TODO: Can this be shared to also handle LLVM intrinsics?
1194 // sin(-X) --> -sin(X)
1195 // tan(-X) --> -tan(X)
1196 if (match(Call
->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X
)))))
1197 return B
.CreateFNeg(B
.CreateCall(Call
->getCalledFunction(), X
));
1202 // cos(-X) --> cos(X)
1203 if (match(Call
->getArgOperand(0), m_FNeg(m_Value(X
))))
1204 return B
.CreateCall(Call
->getCalledFunction(), X
, "cos");
1212 static Value
*getPow(Value
*InnerChain
[33], unsigned Exp
, IRBuilder
<> &B
) {
1213 // Multiplications calculated using Addition Chains.
1214 // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
1216 assert(Exp
!= 0 && "Incorrect exponent 0 not handled");
1218 if (InnerChain
[Exp
])
1219 return InnerChain
[Exp
];
1221 static const unsigned AddChain
[33][2] = {
1223 {0, 0}, // Unused (base case = pow1).
1224 {1, 1}, // Unused (pre-computed).
1225 {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
1226 {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
1227 {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
1228 {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
1229 {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
1232 InnerChain
[Exp
] = B
.CreateFMul(getPow(InnerChain
, AddChain
[Exp
][0], B
),
1233 getPow(InnerChain
, AddChain
[Exp
][1], B
));
1234 return InnerChain
[Exp
];
1237 /// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
1238 /// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
1239 /// exp2(log2(n) * x) for pow(n, x).
1240 Value
*LibCallSimplifier::replacePowWithExp(CallInst
*Pow
, IRBuilder
<> &B
) {
1241 Value
*Base
= Pow
->getArgOperand(0), *Expo
= Pow
->getArgOperand(1);
1242 AttributeList Attrs
= Pow
->getCalledFunction()->getAttributes();
1243 Module
*Mod
= Pow
->getModule();
1244 Type
*Ty
= Pow
->getType();
1247 // Evaluate special cases related to a nested function as the base.
1249 // pow(exp(x), y) -> exp(x * y)
1250 // pow(exp2(x), y) -> exp2(x * y)
1251 // If exp{,2}() is used only once, it is better to fold two transcendental
1252 // math functions into one. If used again, exp{,2}() would still have to be
1253 // called with the original argument, then keep both original transcendental
1254 // functions. However, this transformation is only safe with fully relaxed
1255 // math semantics, since, besides rounding differences, it changes overflow
1256 // and underflow behavior quite dramatically. For example:
1257 // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
1259 // exp(1000 * 0.001) = exp(1)
1260 // TODO: Loosen the requirement for fully relaxed math semantics.
1261 // TODO: Handle exp10() when more targets have it available.
1262 CallInst
*BaseFn
= dyn_cast
<CallInst
>(Base
);
1263 if (BaseFn
&& BaseFn
->hasOneUse() && BaseFn
->isFast() && Pow
->isFast()) {
1266 Function
*CalleeFn
= BaseFn
->getCalledFunction();
1268 TLI
->getLibFunc(CalleeFn
->getName(), LibFn
) && TLI
->has(LibFn
)) {
1273 LibFunc LibFnDouble
;
1274 LibFunc LibFnLongDouble
;
1279 case LibFunc_expf
: case LibFunc_exp
: case LibFunc_expl
:
1280 ExpName
= TLI
->getName(LibFunc_exp
);
1281 ID
= Intrinsic::exp
;
1282 LibFnFloat
= LibFunc_expf
;
1283 LibFnDouble
= LibFunc_exp
;
1284 LibFnLongDouble
= LibFunc_expl
;
1286 case LibFunc_exp2f
: case LibFunc_exp2
: case LibFunc_exp2l
:
1287 ExpName
= TLI
->getName(LibFunc_exp2
);
1288 ID
= Intrinsic::exp2
;
1289 LibFnFloat
= LibFunc_exp2f
;
1290 LibFnDouble
= LibFunc_exp2
;
1291 LibFnLongDouble
= LibFunc_exp2l
;
1295 // Create new exp{,2}() with the product as its argument.
1296 Value
*FMul
= B
.CreateFMul(BaseFn
->getArgOperand(0), Expo
, "mul");
1297 ExpFn
= BaseFn
->doesNotAccessMemory()
1298 ? B
.CreateCall(Intrinsic::getDeclaration(Mod
, ID
, Ty
),
1300 : emitUnaryFloatFnCall(FMul
, TLI
, LibFnDouble
, LibFnFloat
,
1302 BaseFn
->getAttributes());
1304 // Since the new exp{,2}() is different from the original one, dead code
1305 // elimination cannot be trusted to remove it, since it may have side
1306 // effects (e.g., errno). When the only consumer for the original
1307 // exp{,2}() is pow(), then it has to be explicitly erased.
1308 BaseFn
->replaceAllUsesWith(ExpFn
);
1309 eraseFromParent(BaseFn
);
1315 // Evaluate special cases related to a constant base.
1317 const APFloat
*BaseF
;
1318 if (!match(Pow
->getArgOperand(0), m_APFloat(BaseF
)))
1321 // pow(2.0 ** n, x) -> exp2(n * x)
1322 if (hasUnaryFloatFn(TLI
, Ty
, LibFunc_exp2
, LibFunc_exp2f
, LibFunc_exp2l
)) {
1323 APFloat BaseR
= APFloat(1.0);
1324 BaseR
.convert(BaseF
->getSemantics(), APFloat::rmTowardZero
, &Ignored
);
1325 BaseR
= BaseR
/ *BaseF
;
1326 bool IsInteger
= BaseF
->isInteger(), IsReciprocal
= BaseR
.isInteger();
1327 const APFloat
*NF
= IsReciprocal
? &BaseR
: BaseF
;
1328 APSInt
NI(64, false);
1329 if ((IsInteger
|| IsReciprocal
) &&
1330 NF
->convertToInteger(NI
, APFloat::rmTowardZero
, &Ignored
) ==
1332 NI
> 1 && NI
.isPowerOf2()) {
1333 double N
= NI
.logBase2() * (IsReciprocal
? -1.0 : 1.0);
1334 Value
*FMul
= B
.CreateFMul(Expo
, ConstantFP::get(Ty
, N
), "mul");
1335 if (Pow
->doesNotAccessMemory())
1336 return B
.CreateCall(Intrinsic::getDeclaration(Mod
, Intrinsic::exp2
, Ty
),
1339 return emitUnaryFloatFnCall(FMul
, TLI
, LibFunc_exp2
, LibFunc_exp2f
,
1340 LibFunc_exp2l
, B
, Attrs
);
1344 // pow(10.0, x) -> exp10(x)
1345 // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
1346 if (match(Base
, m_SpecificFP(10.0)) &&
1347 hasUnaryFloatFn(TLI
, Ty
, LibFunc_exp10
, LibFunc_exp10f
, LibFunc_exp10l
))
1348 return emitUnaryFloatFnCall(Expo
, TLI
, LibFunc_exp10
, LibFunc_exp10f
,
1349 LibFunc_exp10l
, B
, Attrs
);
1351 // pow(n, x) -> exp2(log2(n) * x)
1352 if (Pow
->hasOneUse() && Pow
->hasApproxFunc() && Pow
->hasNoNaNs() &&
1353 Pow
->hasNoInfs() && BaseF
->isNormal() && !BaseF
->isNegative()) {
1354 Value
*Log
= nullptr;
1355 if (Ty
->isFloatTy())
1356 Log
= ConstantFP::get(Ty
, std::log2(BaseF
->convertToFloat()));
1357 else if (Ty
->isDoubleTy())
1358 Log
= ConstantFP::get(Ty
, std::log2(BaseF
->convertToDouble()));
1361 Value
*FMul
= B
.CreateFMul(Log
, Expo
, "mul");
1362 if (Pow
->doesNotAccessMemory()) {
1363 return B
.CreateCall(Intrinsic::getDeclaration(Mod
, Intrinsic::exp2
, Ty
),
1366 if (hasUnaryFloatFn(TLI
, Ty
, LibFunc_exp2
, LibFunc_exp2f
,
1368 return emitUnaryFloatFnCall(FMul
, TLI
, LibFunc_exp2
, LibFunc_exp2f
,
1369 LibFunc_exp2l
, B
, Attrs
);
1376 static Value
*getSqrtCall(Value
*V
, AttributeList Attrs
, bool NoErrno
,
1377 Module
*M
, IRBuilder
<> &B
,
1378 const TargetLibraryInfo
*TLI
) {
1379 // If errno is never set, then use the intrinsic for sqrt().
1382 Intrinsic::getDeclaration(M
, Intrinsic::sqrt
, V
->getType());
1383 return B
.CreateCall(SqrtFn
, V
, "sqrt");
1386 // Otherwise, use the libcall for sqrt().
1387 if (hasUnaryFloatFn(TLI
, V
->getType(), LibFunc_sqrt
, LibFunc_sqrtf
,
1389 // TODO: We also should check that the target can in fact lower the sqrt()
1390 // libcall. We currently have no way to ask this question, so we ask if
1391 // the target has a sqrt() libcall, which is not exactly the same.
1392 return emitUnaryFloatFnCall(V
, TLI
, LibFunc_sqrt
, LibFunc_sqrtf
,
1393 LibFunc_sqrtl
, B
, Attrs
);
1398 /// Use square root in place of pow(x, +/-0.5).
1399 Value
*LibCallSimplifier::replacePowWithSqrt(CallInst
*Pow
, IRBuilder
<> &B
) {
1400 Value
*Sqrt
, *Base
= Pow
->getArgOperand(0), *Expo
= Pow
->getArgOperand(1);
1401 AttributeList Attrs
= Pow
->getCalledFunction()->getAttributes();
1402 Module
*Mod
= Pow
->getModule();
1403 Type
*Ty
= Pow
->getType();
1405 const APFloat
*ExpoF
;
1406 if (!match(Expo
, m_APFloat(ExpoF
)) ||
1407 (!ExpoF
->isExactlyValue(0.5) && !ExpoF
->isExactlyValue(-0.5)))
1410 Sqrt
= getSqrtCall(Base
, Attrs
, Pow
->doesNotAccessMemory(), Mod
, B
, TLI
);
1414 // Handle signed zero base by expanding to fabs(sqrt(x)).
1415 if (!Pow
->hasNoSignedZeros()) {
1416 Function
*FAbsFn
= Intrinsic::getDeclaration(Mod
, Intrinsic::fabs
, Ty
);
1417 Sqrt
= B
.CreateCall(FAbsFn
, Sqrt
, "abs");
1420 // Handle non finite base by expanding to
1421 // (x == -infinity ? +infinity : sqrt(x)).
1422 if (!Pow
->hasNoInfs()) {
1423 Value
*PosInf
= ConstantFP::getInfinity(Ty
),
1424 *NegInf
= ConstantFP::getInfinity(Ty
, true);
1425 Value
*FCmp
= B
.CreateFCmpOEQ(Base
, NegInf
, "isinf");
1426 Sqrt
= B
.CreateSelect(FCmp
, PosInf
, Sqrt
);
1429 // If the exponent is negative, then get the reciprocal.
1430 if (ExpoF
->isNegative())
1431 Sqrt
= B
.CreateFDiv(ConstantFP::get(Ty
, 1.0), Sqrt
, "reciprocal");
1436 static Value
*createPowWithIntegerExponent(Value
*Base
, Value
*Expo
, Module
*M
,
1438 Value
*Args
[] = {Base
, Expo
};
1439 Function
*F
= Intrinsic::getDeclaration(M
, Intrinsic::powi
, Base
->getType());
1440 return B
.CreateCall(F
, Args
);
1443 Value
*LibCallSimplifier::optimizePow(CallInst
*Pow
, IRBuilder
<> &B
) {
1444 Value
*Base
= Pow
->getArgOperand(0);
1445 Value
*Expo
= Pow
->getArgOperand(1);
1446 Function
*Callee
= Pow
->getCalledFunction();
1447 StringRef Name
= Callee
->getName();
1448 Type
*Ty
= Pow
->getType();
1449 Module
*M
= Pow
->getModule();
1450 Value
*Shrunk
= nullptr;
1451 bool AllowApprox
= Pow
->hasApproxFunc();
1454 // Bail out if simplifying libcalls to pow() is disabled.
1455 if (!hasUnaryFloatFn(TLI
, Ty
, LibFunc_pow
, LibFunc_powf
, LibFunc_powl
))
1458 // Propagate the math semantics from the call to any created instructions.
1459 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1460 B
.setFastMathFlags(Pow
->getFastMathFlags());
1462 // Shrink pow() to powf() if the arguments are single precision,
1463 // unless the result is expected to be double precision.
1464 if (UnsafeFPShrink
&& Name
== TLI
->getName(LibFunc_pow
) &&
1465 hasFloatVersion(Name
))
1466 Shrunk
= optimizeBinaryDoubleFP(Pow
, B
, true);
1468 // Evaluate special cases related to the base.
1470 // pow(1.0, x) -> 1.0
1471 if (match(Base
, m_FPOne()))
1474 if (Value
*Exp
= replacePowWithExp(Pow
, B
))
1477 // Evaluate special cases related to the exponent.
1479 // pow(x, -1.0) -> 1.0 / x
1480 if (match(Expo
, m_SpecificFP(-1.0)))
1481 return B
.CreateFDiv(ConstantFP::get(Ty
, 1.0), Base
, "reciprocal");
1483 // pow(x, 0.0) -> 1.0
1484 if (match(Expo
, m_SpecificFP(0.0)))
1485 return ConstantFP::get(Ty
, 1.0);
1488 if (match(Expo
, m_FPOne()))
1491 // pow(x, 2.0) -> x * x
1492 if (match(Expo
, m_SpecificFP(2.0)))
1493 return B
.CreateFMul(Base
, Base
, "square");
1495 if (Value
*Sqrt
= replacePowWithSqrt(Pow
, B
))
1498 // pow(x, n) -> x * x * x * ...
1499 const APFloat
*ExpoF
;
1500 if (AllowApprox
&& match(Expo
, m_APFloat(ExpoF
))) {
1501 // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
1502 // If the exponent is an integer+0.5 we generate a call to sqrt and an
1504 // TODO: This whole transformation should be backend specific (e.g. some
1505 // backends might prefer libcalls or the limit for the exponent might
1506 // be different) and it should also consider optimizing for size.
1507 APFloat
LimF(ExpoF
->getSemantics(), 33.0),
1509 if (ExpoA
.compare(LimF
) == APFloat::cmpLessThan
) {
1510 // This transformation applies to integer or integer+0.5 exponents only.
1511 // For integer+0.5, we create a sqrt(Base) call.
1512 Value
*Sqrt
= nullptr;
1513 if (!ExpoA
.isInteger()) {
1514 APFloat Expo2
= ExpoA
;
1515 // To check if ExpoA is an integer + 0.5, we add it to itself. If there
1516 // is no floating point exception and the result is an integer, then
1517 // ExpoA == integer + 0.5
1518 if (Expo2
.add(ExpoA
, APFloat::rmNearestTiesToEven
) != APFloat::opOK
)
1521 if (!Expo2
.isInteger())
1524 Sqrt
= getSqrtCall(Base
, Pow
->getCalledFunction()->getAttributes(),
1525 Pow
->doesNotAccessMemory(), M
, B
, TLI
);
1528 // We will memoize intermediate products of the Addition Chain.
1529 Value
*InnerChain
[33] = {nullptr};
1530 InnerChain
[1] = Base
;
1531 InnerChain
[2] = B
.CreateFMul(Base
, Base
, "square");
1533 // We cannot readily convert a non-double type (like float) to a double.
1534 // So we first convert it to something which could be converted to double.
1535 ExpoA
.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero
, &Ignored
);
1536 Value
*FMul
= getPow(InnerChain
, ExpoA
.convertToDouble(), B
);
1538 // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
1540 FMul
= B
.CreateFMul(FMul
, Sqrt
);
1542 // If the exponent is negative, then get the reciprocal.
1543 if (ExpoF
->isNegative())
1544 FMul
= B
.CreateFDiv(ConstantFP::get(Ty
, 1.0), FMul
, "reciprocal");
1549 APSInt
IntExpo(32, /*isUnsigned=*/false);
1550 // powf(x, n) -> powi(x, n) if n is a constant signed integer value
1551 if (ExpoF
->isInteger() &&
1552 ExpoF
->convertToInteger(IntExpo
, APFloat::rmTowardZero
, &Ignored
) ==
1554 return createPowWithIntegerExponent(
1555 Base
, ConstantInt::get(B
.getInt32Ty(), IntExpo
), M
, B
);
1559 // powf(x, itofp(y)) -> powi(x, y)
1560 if (AllowApprox
&& (isa
<SIToFPInst
>(Expo
) || isa
<UIToFPInst
>(Expo
))) {
1561 Value
*IntExpo
= cast
<Instruction
>(Expo
)->getOperand(0);
1562 Value
*NewExpo
= nullptr;
1563 unsigned BitWidth
= IntExpo
->getType()->getPrimitiveSizeInBits();
1564 if (isa
<SIToFPInst
>(Expo
) && BitWidth
== 32)
1566 else if (BitWidth
< 32)
1567 NewExpo
= isa
<SIToFPInst
>(Expo
) ? B
.CreateSExt(IntExpo
, B
.getInt32Ty())
1568 : B
.CreateZExt(IntExpo
, B
.getInt32Ty());
1570 return createPowWithIntegerExponent(Base
, NewExpo
, M
, B
);
1576 Value
*LibCallSimplifier::optimizeExp2(CallInst
*CI
, IRBuilder
<> &B
) {
1577 Function
*Callee
= CI
->getCalledFunction();
1578 Value
*Ret
= nullptr;
1579 StringRef Name
= Callee
->getName();
1580 if (UnsafeFPShrink
&& Name
== "exp2" && hasFloatVersion(Name
))
1581 Ret
= optimizeUnaryDoubleFP(CI
, B
, true);
1583 Value
*Op
= CI
->getArgOperand(0);
1584 // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
1585 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
1586 LibFunc LdExp
= LibFunc_ldexpl
;
1587 if (Op
->getType()->isFloatTy())
1588 LdExp
= LibFunc_ldexpf
;
1589 else if (Op
->getType()->isDoubleTy())
1590 LdExp
= LibFunc_ldexp
;
1592 if (TLI
->has(LdExp
)) {
1593 Value
*LdExpArg
= nullptr;
1594 if (SIToFPInst
*OpC
= dyn_cast
<SIToFPInst
>(Op
)) {
1595 if (OpC
->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
1596 LdExpArg
= B
.CreateSExt(OpC
->getOperand(0), B
.getInt32Ty());
1597 } else if (UIToFPInst
*OpC
= dyn_cast
<UIToFPInst
>(Op
)) {
1598 if (OpC
->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
1599 LdExpArg
= B
.CreateZExt(OpC
->getOperand(0), B
.getInt32Ty());
1603 Constant
*One
= ConstantFP::get(CI
->getContext(), APFloat(1.0f
));
1604 if (!Op
->getType()->isFloatTy())
1605 One
= ConstantExpr::getFPExtend(One
, Op
->getType());
1607 Module
*M
= CI
->getModule();
1608 FunctionCallee NewCallee
= M
->getOrInsertFunction(
1609 TLI
->getName(LdExp
), Op
->getType(), Op
->getType(), B
.getInt32Ty());
1610 CallInst
*CI
= B
.CreateCall(NewCallee
, {One
, LdExpArg
});
1611 if (const Function
*F
= dyn_cast
<Function
>(Callee
->stripPointerCasts()))
1612 CI
->setCallingConv(F
->getCallingConv());
1620 Value
*LibCallSimplifier::optimizeFMinFMax(CallInst
*CI
, IRBuilder
<> &B
) {
1621 // If we can shrink the call to a float function rather than a double
1622 // function, do that first.
1623 Function
*Callee
= CI
->getCalledFunction();
1624 StringRef Name
= Callee
->getName();
1625 if ((Name
== "fmin" || Name
== "fmax") && hasFloatVersion(Name
))
1626 if (Value
*Ret
= optimizeBinaryDoubleFP(CI
, B
))
1629 // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
1630 // the intrinsics for improved optimization (for example, vectorization).
1631 // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
1632 // From the C standard draft WG14/N1256:
1633 // "Ideally, fmax would be sensitive to the sign of zero, for example
1634 // fmax(-0.0, +0.0) would return +0; however, implementation in software
1635 // might be impractical."
1636 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1637 FastMathFlags FMF
= CI
->getFastMathFlags();
1638 FMF
.setNoSignedZeros();
1639 B
.setFastMathFlags(FMF
);
1641 Intrinsic::ID IID
= Callee
->getName().startswith("fmin") ? Intrinsic::minnum
1642 : Intrinsic::maxnum
;
1643 Function
*F
= Intrinsic::getDeclaration(CI
->getModule(), IID
, CI
->getType());
1644 return B
.CreateCall(F
, { CI
->getArgOperand(0), CI
->getArgOperand(1) });
1647 Value
*LibCallSimplifier::optimizeLog(CallInst
*CI
, IRBuilder
<> &B
) {
1648 Function
*Callee
= CI
->getCalledFunction();
1649 Value
*Ret
= nullptr;
1650 StringRef Name
= Callee
->getName();
1651 if (UnsafeFPShrink
&& hasFloatVersion(Name
))
1652 Ret
= optimizeUnaryDoubleFP(CI
, B
, true);
1656 Value
*Op1
= CI
->getArgOperand(0);
1657 auto *OpC
= dyn_cast
<CallInst
>(Op1
);
1659 // The earlier call must also be 'fast' in order to do these transforms.
1660 if (!OpC
|| !OpC
->isFast())
1663 // log(pow(x,y)) -> y*log(x)
1664 // This is only applicable to log, log2, log10.
1665 if (Name
!= "log" && Name
!= "log2" && Name
!= "log10")
1668 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1671 B
.setFastMathFlags(FMF
);
1674 Function
*F
= OpC
->getCalledFunction();
1675 if (F
&& ((TLI
->getLibFunc(F
->getName(), Func
) && TLI
->has(Func
) &&
1676 Func
== LibFunc_pow
) || F
->getIntrinsicID() == Intrinsic::pow
))
1677 return B
.CreateFMul(OpC
->getArgOperand(1),
1678 emitUnaryFloatFnCall(OpC
->getOperand(0), Callee
->getName(), B
,
1679 Callee
->getAttributes()), "mul");
1681 // log(exp2(y)) -> y*log(2)
1682 if (F
&& Name
== "log" && TLI
->getLibFunc(F
->getName(), Func
) &&
1683 TLI
->has(Func
) && Func
== LibFunc_exp2
)
1684 return B
.CreateFMul(
1685 OpC
->getArgOperand(0),
1686 emitUnaryFloatFnCall(ConstantFP::get(CI
->getType(), 2.0),
1687 Callee
->getName(), B
, Callee
->getAttributes()),
1692 Value
*LibCallSimplifier::optimizeSqrt(CallInst
*CI
, IRBuilder
<> &B
) {
1693 Function
*Callee
= CI
->getCalledFunction();
1694 Value
*Ret
= nullptr;
1695 // TODO: Once we have a way (other than checking for the existince of the
1696 // libcall) to tell whether our target can lower @llvm.sqrt, relax the
1698 if (TLI
->has(LibFunc_sqrtf
) && (Callee
->getName() == "sqrt" ||
1699 Callee
->getIntrinsicID() == Intrinsic::sqrt
))
1700 Ret
= optimizeUnaryDoubleFP(CI
, B
, true);
1705 Instruction
*I
= dyn_cast
<Instruction
>(CI
->getArgOperand(0));
1706 if (!I
|| I
->getOpcode() != Instruction::FMul
|| !I
->isFast())
1709 // We're looking for a repeated factor in a multiplication tree,
1710 // so we can do this fold: sqrt(x * x) -> fabs(x);
1711 // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
1712 Value
*Op0
= I
->getOperand(0);
1713 Value
*Op1
= I
->getOperand(1);
1714 Value
*RepeatOp
= nullptr;
1715 Value
*OtherOp
= nullptr;
1717 // Simple match: the operands of the multiply are identical.
1720 // Look for a more complicated pattern: one of the operands is itself
1721 // a multiply, so search for a common factor in that multiply.
1722 // Note: We don't bother looking any deeper than this first level or for
1723 // variations of this pattern because instcombine's visitFMUL and/or the
1724 // reassociation pass should give us this form.
1725 Value
*OtherMul0
, *OtherMul1
;
1726 if (match(Op0
, m_FMul(m_Value(OtherMul0
), m_Value(OtherMul1
)))) {
1727 // Pattern: sqrt((x * y) * z)
1728 if (OtherMul0
== OtherMul1
&& cast
<Instruction
>(Op0
)->isFast()) {
1729 // Matched: sqrt((x * x) * z)
1730 RepeatOp
= OtherMul0
;
1738 // Fast math flags for any created instructions should match the sqrt
1740 IRBuilder
<>::FastMathFlagGuard
Guard(B
);
1741 B
.setFastMathFlags(I
->getFastMathFlags());
1743 // If we found a repeated factor, hoist it out of the square root and
1744 // replace it with the fabs of that factor.
1745 Module
*M
= Callee
->getParent();
1746 Type
*ArgType
= I
->getType();
1747 Function
*Fabs
= Intrinsic::getDeclaration(M
, Intrinsic::fabs
, ArgType
);
1748 Value
*FabsCall
= B
.CreateCall(Fabs
, RepeatOp
, "fabs");
1750 // If we found a non-repeated factor, we still need to get its square
1751 // root. We then multiply that by the value that was simplified out
1752 // of the square root calculation.
1753 Function
*Sqrt
= Intrinsic::getDeclaration(M
, Intrinsic::sqrt
, ArgType
);
1754 Value
*SqrtCall
= B
.CreateCall(Sqrt
, OtherOp
, "sqrt");
1755 return B
.CreateFMul(FabsCall
, SqrtCall
);
1760 // TODO: Generalize to handle any trig function and its inverse.
1761 Value
*LibCallSimplifier::optimizeTan(CallInst
*CI
, IRBuilder
<> &B
) {
1762 Function
*Callee
= CI
->getCalledFunction();
1763 Value
*Ret
= nullptr;
1764 StringRef Name
= Callee
->getName();
1765 if (UnsafeFPShrink
&& Name
== "tan" && hasFloatVersion(Name
))
1766 Ret
= optimizeUnaryDoubleFP(CI
, B
, true);
1768 Value
*Op1
= CI
->getArgOperand(0);
1769 auto *OpC
= dyn_cast
<CallInst
>(Op1
);
1773 // Both calls must be 'fast' in order to remove them.
1774 if (!CI
->isFast() || !OpC
->isFast())
1777 // tan(atan(x)) -> x
1778 // tanf(atanf(x)) -> x
1779 // tanl(atanl(x)) -> x
1781 Function
*F
= OpC
->getCalledFunction();
1782 if (F
&& TLI
->getLibFunc(F
->getName(), Func
) && TLI
->has(Func
) &&
1783 ((Func
== LibFunc_atan
&& Callee
->getName() == "tan") ||
1784 (Func
== LibFunc_atanf
&& Callee
->getName() == "tanf") ||
1785 (Func
== LibFunc_atanl
&& Callee
->getName() == "tanl")))
1786 Ret
= OpC
->getArgOperand(0);
1790 static bool isTrigLibCall(CallInst
*CI
) {
1791 // We can only hope to do anything useful if we can ignore things like errno
1792 // and floating-point exceptions.
1793 // We already checked the prototype.
1794 return CI
->hasFnAttr(Attribute::NoUnwind
) &&
1795 CI
->hasFnAttr(Attribute::ReadNone
);
1798 static void insertSinCosCall(IRBuilder
<> &B
, Function
*OrigCallee
, Value
*Arg
,
1799 bool UseFloat
, Value
*&Sin
, Value
*&Cos
,
1801 Type
*ArgTy
= Arg
->getType();
1805 Triple
T(OrigCallee
->getParent()->getTargetTriple());
1807 Name
= "__sincospif_stret";
1809 assert(T
.getArch() != Triple::x86
&& "x86 messy and unsupported for now");
1810 // x86_64 can't use {float, float} since that would be returned in both
1811 // xmm0 and xmm1, which isn't what a real struct would do.
1812 ResTy
= T
.getArch() == Triple::x86_64
1813 ? static_cast<Type
*>(VectorType::get(ArgTy
, 2))
1814 : static_cast<Type
*>(StructType::get(ArgTy
, ArgTy
));
1816 Name
= "__sincospi_stret";
1817 ResTy
= StructType::get(ArgTy
, ArgTy
);
1820 Module
*M
= OrigCallee
->getParent();
1821 FunctionCallee Callee
=
1822 M
->getOrInsertFunction(Name
, OrigCallee
->getAttributes(), ResTy
, ArgTy
);
1824 if (Instruction
*ArgInst
= dyn_cast
<Instruction
>(Arg
)) {
1825 // If the argument is an instruction, it must dominate all uses so put our
1826 // sincos call there.
1827 B
.SetInsertPoint(ArgInst
->getParent(), ++ArgInst
->getIterator());
1829 // Otherwise (e.g. for a constant) the beginning of the function is as
1830 // good a place as any.
1831 BasicBlock
&EntryBB
= B
.GetInsertBlock()->getParent()->getEntryBlock();
1832 B
.SetInsertPoint(&EntryBB
, EntryBB
.begin());
1835 SinCos
= B
.CreateCall(Callee
, Arg
, "sincospi");
1837 if (SinCos
->getType()->isStructTy()) {
1838 Sin
= B
.CreateExtractValue(SinCos
, 0, "sinpi");
1839 Cos
= B
.CreateExtractValue(SinCos
, 1, "cospi");
1841 Sin
= B
.CreateExtractElement(SinCos
, ConstantInt::get(B
.getInt32Ty(), 0),
1843 Cos
= B
.CreateExtractElement(SinCos
, ConstantInt::get(B
.getInt32Ty(), 1),
1848 Value
*LibCallSimplifier::optimizeSinCosPi(CallInst
*CI
, IRBuilder
<> &B
) {
1849 // Make sure the prototype is as expected, otherwise the rest of the
1850 // function is probably invalid and likely to abort.
1851 if (!isTrigLibCall(CI
))
1854 Value
*Arg
= CI
->getArgOperand(0);
1855 SmallVector
<CallInst
*, 1> SinCalls
;
1856 SmallVector
<CallInst
*, 1> CosCalls
;
1857 SmallVector
<CallInst
*, 1> SinCosCalls
;
1859 bool IsFloat
= Arg
->getType()->isFloatTy();
1861 // Look for all compatible sinpi, cospi and sincospi calls with the same
1862 // argument. If there are enough (in some sense) we can make the
1864 Function
*F
= CI
->getFunction();
1865 for (User
*U
: Arg
->users())
1866 classifyArgUse(U
, F
, IsFloat
, SinCalls
, CosCalls
, SinCosCalls
);
1868 // It's only worthwhile if both sinpi and cospi are actually used.
1869 if (SinCosCalls
.empty() && (SinCalls
.empty() || CosCalls
.empty()))
1872 Value
*Sin
, *Cos
, *SinCos
;
1873 insertSinCosCall(B
, CI
->getCalledFunction(), Arg
, IsFloat
, Sin
, Cos
, SinCos
);
1875 auto replaceTrigInsts
= [this](SmallVectorImpl
<CallInst
*> &Calls
,
1877 for (CallInst
*C
: Calls
)
1878 replaceAllUsesWith(C
, Res
);
1881 replaceTrigInsts(SinCalls
, Sin
);
1882 replaceTrigInsts(CosCalls
, Cos
);
1883 replaceTrigInsts(SinCosCalls
, SinCos
);
1888 void LibCallSimplifier::classifyArgUse(
1889 Value
*Val
, Function
*F
, bool IsFloat
,
1890 SmallVectorImpl
<CallInst
*> &SinCalls
,
1891 SmallVectorImpl
<CallInst
*> &CosCalls
,
1892 SmallVectorImpl
<CallInst
*> &SinCosCalls
) {
1893 CallInst
*CI
= dyn_cast
<CallInst
>(Val
);
1898 // Don't consider calls in other functions.
1899 if (CI
->getFunction() != F
)
1902 Function
*Callee
= CI
->getCalledFunction();
1904 if (!Callee
|| !TLI
->getLibFunc(*Callee
, Func
) || !TLI
->has(Func
) ||
1909 if (Func
== LibFunc_sinpif
)
1910 SinCalls
.push_back(CI
);
1911 else if (Func
== LibFunc_cospif
)
1912 CosCalls
.push_back(CI
);
1913 else if (Func
== LibFunc_sincospif_stret
)
1914 SinCosCalls
.push_back(CI
);
1916 if (Func
== LibFunc_sinpi
)
1917 SinCalls
.push_back(CI
);
1918 else if (Func
== LibFunc_cospi
)
1919 CosCalls
.push_back(CI
);
1920 else if (Func
== LibFunc_sincospi_stret
)
1921 SinCosCalls
.push_back(CI
);
1925 //===----------------------------------------------------------------------===//
1926 // Integer Library Call Optimizations
1927 //===----------------------------------------------------------------------===//
1929 Value
*LibCallSimplifier::optimizeFFS(CallInst
*CI
, IRBuilder
<> &B
) {
1930 // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
1931 Value
*Op
= CI
->getArgOperand(0);
1932 Type
*ArgType
= Op
->getType();
1933 Function
*F
= Intrinsic::getDeclaration(CI
->getCalledFunction()->getParent(),
1934 Intrinsic::cttz
, ArgType
);
1935 Value
*V
= B
.CreateCall(F
, {Op
, B
.getTrue()}, "cttz");
1936 V
= B
.CreateAdd(V
, ConstantInt::get(V
->getType(), 1));
1937 V
= B
.CreateIntCast(V
, B
.getInt32Ty(), false);
1939 Value
*Cond
= B
.CreateICmpNE(Op
, Constant::getNullValue(ArgType
));
1940 return B
.CreateSelect(Cond
, V
, B
.getInt32(0));
1943 Value
*LibCallSimplifier::optimizeFls(CallInst
*CI
, IRBuilder
<> &B
) {
1944 // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
1945 Value
*Op
= CI
->getArgOperand(0);
1946 Type
*ArgType
= Op
->getType();
1947 Function
*F
= Intrinsic::getDeclaration(CI
->getCalledFunction()->getParent(),
1948 Intrinsic::ctlz
, ArgType
);
1949 Value
*V
= B
.CreateCall(F
, {Op
, B
.getFalse()}, "ctlz");
1950 V
= B
.CreateSub(ConstantInt::get(V
->getType(), ArgType
->getIntegerBitWidth()),
1952 return B
.CreateIntCast(V
, CI
->getType(), false);
1955 Value
*LibCallSimplifier::optimizeAbs(CallInst
*CI
, IRBuilder
<> &B
) {
1956 // abs(x) -> x <s 0 ? -x : x
1957 // The negation has 'nsw' because abs of INT_MIN is undefined.
1958 Value
*X
= CI
->getArgOperand(0);
1959 Value
*IsNeg
= B
.CreateICmpSLT(X
, Constant::getNullValue(X
->getType()));
1960 Value
*NegX
= B
.CreateNSWNeg(X
, "neg");
1961 return B
.CreateSelect(IsNeg
, NegX
, X
);
1964 Value
*LibCallSimplifier::optimizeIsDigit(CallInst
*CI
, IRBuilder
<> &B
) {
1965 // isdigit(c) -> (c-'0') <u 10
1966 Value
*Op
= CI
->getArgOperand(0);
1967 Op
= B
.CreateSub(Op
, B
.getInt32('0'), "isdigittmp");
1968 Op
= B
.CreateICmpULT(Op
, B
.getInt32(10), "isdigit");
1969 return B
.CreateZExt(Op
, CI
->getType());
1972 Value
*LibCallSimplifier::optimizeIsAscii(CallInst
*CI
, IRBuilder
<> &B
) {
1973 // isascii(c) -> c <u 128
1974 Value
*Op
= CI
->getArgOperand(0);
1975 Op
= B
.CreateICmpULT(Op
, B
.getInt32(128), "isascii");
1976 return B
.CreateZExt(Op
, CI
->getType());
1979 Value
*LibCallSimplifier::optimizeToAscii(CallInst
*CI
, IRBuilder
<> &B
) {
1980 // toascii(c) -> c & 0x7f
1981 return B
.CreateAnd(CI
->getArgOperand(0),
1982 ConstantInt::get(CI
->getType(), 0x7F));
1985 Value
*LibCallSimplifier::optimizeAtoi(CallInst
*CI
, IRBuilder
<> &B
) {
1987 if (!getConstantStringInfo(CI
->getArgOperand(0), Str
))
1990 return convertStrToNumber(CI
, Str
, 10);
1993 Value
*LibCallSimplifier::optimizeStrtol(CallInst
*CI
, IRBuilder
<> &B
) {
1995 if (!getConstantStringInfo(CI
->getArgOperand(0), Str
))
1998 if (!isa
<ConstantPointerNull
>(CI
->getArgOperand(1)))
2001 if (ConstantInt
*CInt
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2))) {
2002 return convertStrToNumber(CI
, Str
, CInt
->getSExtValue());
2008 //===----------------------------------------------------------------------===//
2009 // Formatting and IO Library Call Optimizations
2010 //===----------------------------------------------------------------------===//
2012 static bool isReportingError(Function
*Callee
, CallInst
*CI
, int StreamArg
);
2014 Value
*LibCallSimplifier::optimizeErrorReporting(CallInst
*CI
, IRBuilder
<> &B
,
2016 Function
*Callee
= CI
->getCalledFunction();
2017 // Error reporting calls should be cold, mark them as such.
2018 // This applies even to non-builtin calls: it is only a hint and applies to
2019 // functions that the frontend might not understand as builtins.
2021 // This heuristic was suggested in:
2022 // Improving Static Branch Prediction in a Compiler
2023 // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
2024 // Proceedings of PACT'98, Oct. 1998, IEEE
2025 if (!CI
->hasFnAttr(Attribute::Cold
) &&
2026 isReportingError(Callee
, CI
, StreamArg
)) {
2027 CI
->addAttribute(AttributeList::FunctionIndex
, Attribute::Cold
);
2033 static bool isReportingError(Function
*Callee
, CallInst
*CI
, int StreamArg
) {
2034 if (!Callee
|| !Callee
->isDeclaration())
2040 // These functions might be considered cold, but only if their stream
2041 // argument is stderr.
2043 if (StreamArg
>= (int)CI
->getNumArgOperands())
2045 LoadInst
*LI
= dyn_cast
<LoadInst
>(CI
->getArgOperand(StreamArg
));
2048 GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(LI
->getPointerOperand());
2049 if (!GV
|| !GV
->isDeclaration())
2051 return GV
->getName() == "stderr";
2054 Value
*LibCallSimplifier::optimizePrintFString(CallInst
*CI
, IRBuilder
<> &B
) {
2055 // Check for a fixed format string.
2056 StringRef FormatStr
;
2057 if (!getConstantStringInfo(CI
->getArgOperand(0), FormatStr
))
2060 // Empty format string -> noop.
2061 if (FormatStr
.empty()) // Tolerate printf's declared void.
2062 return CI
->use_empty() ? (Value
*)CI
: ConstantInt::get(CI
->getType(), 0);
2064 // Do not do any of the following transformations if the printf return value
2065 // is used, in general the printf return value is not compatible with either
2066 // putchar() or puts().
2067 if (!CI
->use_empty())
2070 // printf("x") -> putchar('x'), even for "%" and "%%".
2071 if (FormatStr
.size() == 1 || FormatStr
== "%%")
2072 return emitPutChar(B
.getInt32(FormatStr
[0]), B
, TLI
);
2074 // printf("%s", "a") --> putchar('a')
2075 if (FormatStr
== "%s" && CI
->getNumArgOperands() > 1) {
2077 if (!getConstantStringInfo(CI
->getOperand(1), ChrStr
))
2079 if (ChrStr
.size() != 1)
2081 return emitPutChar(B
.getInt32(ChrStr
[0]), B
, TLI
);
2084 // printf("foo\n") --> puts("foo")
2085 if (FormatStr
[FormatStr
.size() - 1] == '\n' &&
2086 FormatStr
.find('%') == StringRef::npos
) { // No format characters.
2087 // Create a string literal with no \n on it. We expect the constant merge
2088 // pass to be run after this pass, to merge duplicate strings.
2089 FormatStr
= FormatStr
.drop_back();
2090 Value
*GV
= B
.CreateGlobalString(FormatStr
, "str");
2091 return emitPutS(GV
, B
, TLI
);
2094 // Optimize specific format strings.
2095 // printf("%c", chr) --> putchar(chr)
2096 if (FormatStr
== "%c" && CI
->getNumArgOperands() > 1 &&
2097 CI
->getArgOperand(1)->getType()->isIntegerTy())
2098 return emitPutChar(CI
->getArgOperand(1), B
, TLI
);
2100 // printf("%s\n", str) --> puts(str)
2101 if (FormatStr
== "%s\n" && CI
->getNumArgOperands() > 1 &&
2102 CI
->getArgOperand(1)->getType()->isPointerTy())
2103 return emitPutS(CI
->getArgOperand(1), B
, TLI
);
2107 Value
*LibCallSimplifier::optimizePrintF(CallInst
*CI
, IRBuilder
<> &B
) {
2109 Function
*Callee
= CI
->getCalledFunction();
2110 FunctionType
*FT
= Callee
->getFunctionType();
2111 if (Value
*V
= optimizePrintFString(CI
, B
)) {
2115 // printf(format, ...) -> iprintf(format, ...) if no floating point
2117 if (TLI
->has(LibFunc_iprintf
) && !callHasFloatingPointArgument(CI
)) {
2118 Module
*M
= B
.GetInsertBlock()->getParent()->getParent();
2119 FunctionCallee IPrintFFn
=
2120 M
->getOrInsertFunction("iprintf", FT
, Callee
->getAttributes());
2121 CallInst
*New
= cast
<CallInst
>(CI
->clone());
2122 New
->setCalledFunction(IPrintFFn
);
2127 // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
2129 if (TLI
->has(LibFunc_small_printf
) && !callHasFP128Argument(CI
)) {
2130 Module
*M
= B
.GetInsertBlock()->getParent()->getParent();
2131 auto SmallPrintFFn
=
2132 M
->getOrInsertFunction(TLI
->getName(LibFunc_small_printf
),
2133 FT
, Callee
->getAttributes());
2134 CallInst
*New
= cast
<CallInst
>(CI
->clone());
2135 New
->setCalledFunction(SmallPrintFFn
);
2143 Value
*LibCallSimplifier::optimizeSPrintFString(CallInst
*CI
, IRBuilder
<> &B
) {
2144 // Check for a fixed format string.
2145 StringRef FormatStr
;
2146 if (!getConstantStringInfo(CI
->getArgOperand(1), FormatStr
))
2149 // If we just have a format string (nothing else crazy) transform it.
2150 if (CI
->getNumArgOperands() == 2) {
2151 // Make sure there's no % in the constant array. We could try to handle
2152 // %% -> % in the future if we cared.
2153 if (FormatStr
.find('%') != StringRef::npos
)
2154 return nullptr; // we found a format specifier, bail out.
2156 // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
2157 B
.CreateMemCpy(CI
->getArgOperand(0), 1, CI
->getArgOperand(1), 1,
2158 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()),
2159 FormatStr
.size() + 1)); // Copy the null byte.
2160 return ConstantInt::get(CI
->getType(), FormatStr
.size());
2163 // The remaining optimizations require the format string to be "%s" or "%c"
2164 // and have an extra operand.
2165 if (FormatStr
.size() != 2 || FormatStr
[0] != '%' ||
2166 CI
->getNumArgOperands() < 3)
2169 // Decode the second character of the format string.
2170 if (FormatStr
[1] == 'c') {
2171 // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
2172 if (!CI
->getArgOperand(2)->getType()->isIntegerTy())
2174 Value
*V
= B
.CreateTrunc(CI
->getArgOperand(2), B
.getInt8Ty(), "char");
2175 Value
*Ptr
= castToCStr(CI
->getArgOperand(0), B
);
2176 B
.CreateStore(V
, Ptr
);
2177 Ptr
= B
.CreateGEP(B
.getInt8Ty(), Ptr
, B
.getInt32(1), "nul");
2178 B
.CreateStore(B
.getInt8(0), Ptr
);
2180 return ConstantInt::get(CI
->getType(), 1);
2183 if (FormatStr
[1] == 's') {
2184 // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
2186 if (!CI
->getArgOperand(2)->getType()->isPointerTy())
2189 Value
*Len
= emitStrLen(CI
->getArgOperand(2), B
, DL
, TLI
);
2193 B
.CreateAdd(Len
, ConstantInt::get(Len
->getType(), 1), "leninc");
2194 B
.CreateMemCpy(CI
->getArgOperand(0), 1, CI
->getArgOperand(2), 1, IncLen
);
2196 // The sprintf result is the unincremented number of bytes in the string.
2197 return B
.CreateIntCast(Len
, CI
->getType(), false);
2202 Value
*LibCallSimplifier::optimizeSPrintF(CallInst
*CI
, IRBuilder
<> &B
) {
2203 Function
*Callee
= CI
->getCalledFunction();
2204 FunctionType
*FT
= Callee
->getFunctionType();
2205 if (Value
*V
= optimizeSPrintFString(CI
, B
)) {
2209 // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
2211 if (TLI
->has(LibFunc_siprintf
) && !callHasFloatingPointArgument(CI
)) {
2212 Module
*M
= B
.GetInsertBlock()->getParent()->getParent();
2213 FunctionCallee SIPrintFFn
=
2214 M
->getOrInsertFunction("siprintf", FT
, Callee
->getAttributes());
2215 CallInst
*New
= cast
<CallInst
>(CI
->clone());
2216 New
->setCalledFunction(SIPrintFFn
);
2221 // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
2222 // floating point arguments.
2223 if (TLI
->has(LibFunc_small_sprintf
) && !callHasFP128Argument(CI
)) {
2224 Module
*M
= B
.GetInsertBlock()->getParent()->getParent();
2225 auto SmallSPrintFFn
=
2226 M
->getOrInsertFunction(TLI
->getName(LibFunc_small_sprintf
),
2227 FT
, Callee
->getAttributes());
2228 CallInst
*New
= cast
<CallInst
>(CI
->clone());
2229 New
->setCalledFunction(SmallSPrintFFn
);
2237 Value
*LibCallSimplifier::optimizeSnPrintFString(CallInst
*CI
, IRBuilder
<> &B
) {
2238 // Check for a fixed format string.
2239 StringRef FormatStr
;
2240 if (!getConstantStringInfo(CI
->getArgOperand(2), FormatStr
))
2244 ConstantInt
*Size
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
2248 uint64_t N
= Size
->getZExtValue();
2250 // If we just have a format string (nothing else crazy) transform it.
2251 if (CI
->getNumArgOperands() == 3) {
2252 // Make sure there's no % in the constant array. We could try to handle
2253 // %% -> % in the future if we cared.
2254 if (FormatStr
.find('%') != StringRef::npos
)
2255 return nullptr; // we found a format specifier, bail out.
2258 return ConstantInt::get(CI
->getType(), FormatStr
.size());
2259 else if (N
< FormatStr
.size() + 1)
2262 // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
2265 CI
->getArgOperand(0), 1, CI
->getArgOperand(2), 1,
2266 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()),
2267 FormatStr
.size() + 1)); // Copy the null byte.
2268 return ConstantInt::get(CI
->getType(), FormatStr
.size());
2271 // The remaining optimizations require the format string to be "%s" or "%c"
2272 // and have an extra operand.
2273 if (FormatStr
.size() == 2 && FormatStr
[0] == '%' &&
2274 CI
->getNumArgOperands() == 4) {
2276 // Decode the second character of the format string.
2277 if (FormatStr
[1] == 'c') {
2279 return ConstantInt::get(CI
->getType(), 1);
2283 // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
2284 if (!CI
->getArgOperand(3)->getType()->isIntegerTy())
2286 Value
*V
= B
.CreateTrunc(CI
->getArgOperand(3), B
.getInt8Ty(), "char");
2287 Value
*Ptr
= castToCStr(CI
->getArgOperand(0), B
);
2288 B
.CreateStore(V
, Ptr
);
2289 Ptr
= B
.CreateGEP(B
.getInt8Ty(), Ptr
, B
.getInt32(1), "nul");
2290 B
.CreateStore(B
.getInt8(0), Ptr
);
2292 return ConstantInt::get(CI
->getType(), 1);
2295 if (FormatStr
[1] == 's') {
2296 // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
2298 if (!getConstantStringInfo(CI
->getArgOperand(3), Str
))
2302 return ConstantInt::get(CI
->getType(), Str
.size());
2303 else if (N
< Str
.size() + 1)
2306 B
.CreateMemCpy(CI
->getArgOperand(0), 1, CI
->getArgOperand(3), 1,
2307 ConstantInt::get(CI
->getType(), Str
.size() + 1));
2309 // The snprintf result is the unincremented number of bytes in the string.
2310 return ConstantInt::get(CI
->getType(), Str
.size());
2316 Value
*LibCallSimplifier::optimizeSnPrintF(CallInst
*CI
, IRBuilder
<> &B
) {
2317 if (Value
*V
= optimizeSnPrintFString(CI
, B
)) {
2324 Value
*LibCallSimplifier::optimizeFPrintFString(CallInst
*CI
, IRBuilder
<> &B
) {
2325 optimizeErrorReporting(CI
, B
, 0);
2327 // All the optimizations depend on the format string.
2328 StringRef FormatStr
;
2329 if (!getConstantStringInfo(CI
->getArgOperand(1), FormatStr
))
2332 // Do not do any of the following transformations if the fprintf return
2333 // value is used, in general the fprintf return value is not compatible
2334 // with fwrite(), fputc() or fputs().
2335 if (!CI
->use_empty())
2338 // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
2339 if (CI
->getNumArgOperands() == 2) {
2340 // Could handle %% -> % if we cared.
2341 if (FormatStr
.find('%') != StringRef::npos
)
2342 return nullptr; // We found a format specifier.
2345 CI
->getArgOperand(1),
2346 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), FormatStr
.size()),
2347 CI
->getArgOperand(0), B
, DL
, TLI
);
2350 // The remaining optimizations require the format string to be "%s" or "%c"
2351 // and have an extra operand.
2352 if (FormatStr
.size() != 2 || FormatStr
[0] != '%' ||
2353 CI
->getNumArgOperands() < 3)
2356 // Decode the second character of the format string.
2357 if (FormatStr
[1] == 'c') {
2358 // fprintf(F, "%c", chr) --> fputc(chr, F)
2359 if (!CI
->getArgOperand(2)->getType()->isIntegerTy())
2361 return emitFPutC(CI
->getArgOperand(2), CI
->getArgOperand(0), B
, TLI
);
2364 if (FormatStr
[1] == 's') {
2365 // fprintf(F, "%s", str) --> fputs(str, F)
2366 if (!CI
->getArgOperand(2)->getType()->isPointerTy())
2368 return emitFPutS(CI
->getArgOperand(2), CI
->getArgOperand(0), B
, TLI
);
2373 Value
*LibCallSimplifier::optimizeFPrintF(CallInst
*CI
, IRBuilder
<> &B
) {
2374 Function
*Callee
= CI
->getCalledFunction();
2375 FunctionType
*FT
= Callee
->getFunctionType();
2376 if (Value
*V
= optimizeFPrintFString(CI
, B
)) {
2380 // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
2381 // floating point arguments.
2382 if (TLI
->has(LibFunc_fiprintf
) && !callHasFloatingPointArgument(CI
)) {
2383 Module
*M
= B
.GetInsertBlock()->getParent()->getParent();
2384 FunctionCallee FIPrintFFn
=
2385 M
->getOrInsertFunction("fiprintf", FT
, Callee
->getAttributes());
2386 CallInst
*New
= cast
<CallInst
>(CI
->clone());
2387 New
->setCalledFunction(FIPrintFFn
);
2392 // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
2393 // 128-bit floating point arguments.
2394 if (TLI
->has(LibFunc_small_fprintf
) && !callHasFP128Argument(CI
)) {
2395 Module
*M
= B
.GetInsertBlock()->getParent()->getParent();
2396 auto SmallFPrintFFn
=
2397 M
->getOrInsertFunction(TLI
->getName(LibFunc_small_fprintf
),
2398 FT
, Callee
->getAttributes());
2399 CallInst
*New
= cast
<CallInst
>(CI
->clone());
2400 New
->setCalledFunction(SmallFPrintFFn
);
2408 Value
*LibCallSimplifier::optimizeFWrite(CallInst
*CI
, IRBuilder
<> &B
) {
2409 optimizeErrorReporting(CI
, B
, 3);
2411 // Get the element size and count.
2412 ConstantInt
*SizeC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(1));
2413 ConstantInt
*CountC
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(2));
2414 if (SizeC
&& CountC
) {
2415 uint64_t Bytes
= SizeC
->getZExtValue() * CountC
->getZExtValue();
2417 // If this is writing zero records, remove the call (it's a noop).
2419 return ConstantInt::get(CI
->getType(), 0);
2421 // If this is writing one byte, turn it into fputc.
2422 // This optimisation is only valid, if the return value is unused.
2423 if (Bytes
== 1 && CI
->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
2424 Value
*Char
= B
.CreateLoad(B
.getInt8Ty(),
2425 castToCStr(CI
->getArgOperand(0), B
), "char");
2426 Value
*NewCI
= emitFPutC(Char
, CI
->getArgOperand(3), B
, TLI
);
2427 return NewCI
? ConstantInt::get(CI
->getType(), 1) : nullptr;
2431 if (isLocallyOpenedFile(CI
->getArgOperand(3), CI
, B
, TLI
))
2432 return emitFWriteUnlocked(CI
->getArgOperand(0), CI
->getArgOperand(1),
2433 CI
->getArgOperand(2), CI
->getArgOperand(3), B
, DL
,
2439 Value
*LibCallSimplifier::optimizeFPuts(CallInst
*CI
, IRBuilder
<> &B
) {
2440 optimizeErrorReporting(CI
, B
, 1);
2442 // Don't rewrite fputs to fwrite when optimising for size because fwrite
2443 // requires more arguments and thus extra MOVs are required.
2444 bool OptForSize
= CI
->getFunction()->hasOptSize() ||
2445 llvm::shouldOptimizeForSize(CI
->getParent(), PSI
, BFI
);
2449 // Check if has any use
2450 if (!CI
->use_empty()) {
2451 if (isLocallyOpenedFile(CI
->getArgOperand(1), CI
, B
, TLI
))
2452 return emitFPutSUnlocked(CI
->getArgOperand(0), CI
->getArgOperand(1), B
,
2455 // We can't optimize if return value is used.
2459 // fputs(s,F) --> fwrite(s,strlen(s),1,F)
2460 uint64_t Len
= GetStringLength(CI
->getArgOperand(0));
2464 // Known to have no uses (see above).
2466 CI
->getArgOperand(0),
2467 ConstantInt::get(DL
.getIntPtrType(CI
->getContext()), Len
- 1),
2468 CI
->getArgOperand(1), B
, DL
, TLI
);
2471 Value
*LibCallSimplifier::optimizeFPutc(CallInst
*CI
, IRBuilder
<> &B
) {
2472 optimizeErrorReporting(CI
, B
, 1);
2474 if (isLocallyOpenedFile(CI
->getArgOperand(1), CI
, B
, TLI
))
2475 return emitFPutCUnlocked(CI
->getArgOperand(0), CI
->getArgOperand(1), B
,
2481 Value
*LibCallSimplifier::optimizeFGetc(CallInst
*CI
, IRBuilder
<> &B
) {
2482 if (isLocallyOpenedFile(CI
->getArgOperand(0), CI
, B
, TLI
))
2483 return emitFGetCUnlocked(CI
->getArgOperand(0), B
, TLI
);
2488 Value
*LibCallSimplifier::optimizeFGets(CallInst
*CI
, IRBuilder
<> &B
) {
2489 if (isLocallyOpenedFile(CI
->getArgOperand(2), CI
, B
, TLI
))
2490 return emitFGetSUnlocked(CI
->getArgOperand(0), CI
->getArgOperand(1),
2491 CI
->getArgOperand(2), B
, TLI
);
2496 Value
*LibCallSimplifier::optimizeFRead(CallInst
*CI
, IRBuilder
<> &B
) {
2497 if (isLocallyOpenedFile(CI
->getArgOperand(3), CI
, B
, TLI
))
2498 return emitFReadUnlocked(CI
->getArgOperand(0), CI
->getArgOperand(1),
2499 CI
->getArgOperand(2), CI
->getArgOperand(3), B
, DL
,
2505 Value
*LibCallSimplifier::optimizePuts(CallInst
*CI
, IRBuilder
<> &B
) {
2506 if (!CI
->use_empty())
2509 // Check for a constant string.
2510 // puts("") -> putchar('\n')
2512 if (getConstantStringInfo(CI
->getArgOperand(0), Str
) && Str
.empty())
2513 return emitPutChar(B
.getInt32('\n'), B
, TLI
);
2518 bool LibCallSimplifier::hasFloatVersion(StringRef FuncName
) {
2520 SmallString
<20> FloatFuncName
= FuncName
;
2521 FloatFuncName
+= 'f';
2522 if (TLI
->getLibFunc(FloatFuncName
, Func
))
2523 return TLI
->has(Func
);
2527 Value
*LibCallSimplifier::optimizeStringMemoryLibCall(CallInst
*CI
,
2528 IRBuilder
<> &Builder
) {
2530 Function
*Callee
= CI
->getCalledFunction();
2531 // Check for string/memory library functions.
2532 if (TLI
->getLibFunc(*Callee
, Func
) && TLI
->has(Func
)) {
2533 // Make sure we never change the calling convention.
2534 assert((ignoreCallingConv(Func
) ||
2535 isCallingConvCCompatible(CI
)) &&
2536 "Optimizing string/memory libcall would change the calling convention");
2538 case LibFunc_strcat
:
2539 return optimizeStrCat(CI
, Builder
);
2540 case LibFunc_strncat
:
2541 return optimizeStrNCat(CI
, Builder
);
2542 case LibFunc_strchr
:
2543 return optimizeStrChr(CI
, Builder
);
2544 case LibFunc_strrchr
:
2545 return optimizeStrRChr(CI
, Builder
);
2546 case LibFunc_strcmp
:
2547 return optimizeStrCmp(CI
, Builder
);
2548 case LibFunc_strncmp
:
2549 return optimizeStrNCmp(CI
, Builder
);
2550 case LibFunc_strcpy
:
2551 return optimizeStrCpy(CI
, Builder
);
2552 case LibFunc_stpcpy
:
2553 return optimizeStpCpy(CI
, Builder
);
2554 case LibFunc_strncpy
:
2555 return optimizeStrNCpy(CI
, Builder
);
2556 case LibFunc_strlen
:
2557 return optimizeStrLen(CI
, Builder
);
2558 case LibFunc_strpbrk
:
2559 return optimizeStrPBrk(CI
, Builder
);
2560 case LibFunc_strtol
:
2561 case LibFunc_strtod
:
2562 case LibFunc_strtof
:
2563 case LibFunc_strtoul
:
2564 case LibFunc_strtoll
:
2565 case LibFunc_strtold
:
2566 case LibFunc_strtoull
:
2567 return optimizeStrTo(CI
, Builder
);
2568 case LibFunc_strspn
:
2569 return optimizeStrSpn(CI
, Builder
);
2570 case LibFunc_strcspn
:
2571 return optimizeStrCSpn(CI
, Builder
);
2572 case LibFunc_strstr
:
2573 return optimizeStrStr(CI
, Builder
);
2574 case LibFunc_memchr
:
2575 return optimizeMemChr(CI
, Builder
);
2577 return optimizeBCmp(CI
, Builder
);
2578 case LibFunc_memcmp
:
2579 return optimizeMemCmp(CI
, Builder
);
2580 case LibFunc_memcpy
:
2581 return optimizeMemCpy(CI
, Builder
);
2582 case LibFunc_memmove
:
2583 return optimizeMemMove(CI
, Builder
);
2584 case LibFunc_memset
:
2585 return optimizeMemSet(CI
, Builder
);
2586 case LibFunc_realloc
:
2587 return optimizeRealloc(CI
, Builder
);
2588 case LibFunc_wcslen
:
2589 return optimizeWcslen(CI
, Builder
);
2597 Value
*LibCallSimplifier::optimizeFloatingPointLibCall(CallInst
*CI
,
2599 IRBuilder
<> &Builder
) {
2600 // Don't optimize calls that require strict floating point semantics.
2601 if (CI
->isStrictFP())
2604 if (Value
*V
= optimizeTrigReflections(CI
, Func
, Builder
))
2608 case LibFunc_sinpif
:
2610 case LibFunc_cospif
:
2612 return optimizeSinCosPi(CI
, Builder
);
2616 return optimizePow(CI
, Builder
);
2620 return optimizeExp2(CI
, Builder
);
2624 return replaceUnaryCall(CI
, Builder
, Intrinsic::fabs
);
2628 return optimizeSqrt(CI
, Builder
);
2634 return optimizeLog(CI
, Builder
);
2638 return optimizeTan(CI
, Builder
);
2640 return replaceUnaryCall(CI
, Builder
, Intrinsic::ceil
);
2642 return replaceUnaryCall(CI
, Builder
, Intrinsic::floor
);
2644 return replaceUnaryCall(CI
, Builder
, Intrinsic::round
);
2645 case LibFunc_nearbyint
:
2646 return replaceUnaryCall(CI
, Builder
, Intrinsic::nearbyint
);
2648 return replaceUnaryCall(CI
, Builder
, Intrinsic::rint
);
2650 return replaceUnaryCall(CI
, Builder
, Intrinsic::trunc
);
2666 if (UnsafeFPShrink
&& hasFloatVersion(CI
->getCalledFunction()->getName()))
2667 return optimizeUnaryDoubleFP(CI
, Builder
, true);
2669 case LibFunc_copysign
:
2670 if (hasFloatVersion(CI
->getCalledFunction()->getName()))
2671 return optimizeBinaryDoubleFP(CI
, Builder
);
2679 return optimizeFMinFMax(CI
, Builder
);
2683 return optimizeCAbs(CI
, Builder
);
2689 Value
*LibCallSimplifier::optimizeCall(CallInst
*CI
) {
2690 // TODO: Split out the code below that operates on FP calls so that
2691 // we can all non-FP calls with the StrictFP attribute to be
2693 if (CI
->isNoBuiltin())
2697 Function
*Callee
= CI
->getCalledFunction();
2699 SmallVector
<OperandBundleDef
, 2> OpBundles
;
2700 CI
->getOperandBundlesAsDefs(OpBundles
);
2701 IRBuilder
<> Builder(CI
, /*FPMathTag=*/nullptr, OpBundles
);
2702 bool isCallingConvC
= isCallingConvCCompatible(CI
);
2704 // Command-line parameter overrides instruction attribute.
2705 // This can't be moved to optimizeFloatingPointLibCall() because it may be
2706 // used by the intrinsic optimizations.
2707 if (EnableUnsafeFPShrink
.getNumOccurrences() > 0)
2708 UnsafeFPShrink
= EnableUnsafeFPShrink
;
2709 else if (isa
<FPMathOperator
>(CI
) && CI
->isFast())
2710 UnsafeFPShrink
= true;
2712 // First, check for intrinsics.
2713 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(CI
)) {
2714 if (!isCallingConvC
)
2716 // The FP intrinsics have corresponding constrained versions so we don't
2717 // need to check for the StrictFP attribute here.
2718 switch (II
->getIntrinsicID()) {
2719 case Intrinsic::pow
:
2720 return optimizePow(CI
, Builder
);
2721 case Intrinsic::exp2
:
2722 return optimizeExp2(CI
, Builder
);
2723 case Intrinsic::log
:
2724 return optimizeLog(CI
, Builder
);
2725 case Intrinsic::sqrt
:
2726 return optimizeSqrt(CI
, Builder
);
2727 // TODO: Use foldMallocMemset() with memset intrinsic.
2733 // Also try to simplify calls to fortified library functions.
2734 if (Value
*SimplifiedFortifiedCI
= FortifiedSimplifier
.optimizeCall(CI
)) {
2735 // Try to further simplify the result.
2736 CallInst
*SimplifiedCI
= dyn_cast
<CallInst
>(SimplifiedFortifiedCI
);
2737 if (SimplifiedCI
&& SimplifiedCI
->getCalledFunction()) {
2738 // Use an IR Builder from SimplifiedCI if available instead of CI
2739 // to guarantee we reach all uses we might replace later on.
2740 IRBuilder
<> TmpBuilder(SimplifiedCI
);
2741 if (Value
*V
= optimizeStringMemoryLibCall(SimplifiedCI
, TmpBuilder
)) {
2742 // If we were able to further simplify, remove the now redundant call.
2743 SimplifiedCI
->replaceAllUsesWith(V
);
2744 eraseFromParent(SimplifiedCI
);
2748 return SimplifiedFortifiedCI
;
2751 // Then check for known library functions.
2752 if (TLI
->getLibFunc(*Callee
, Func
) && TLI
->has(Func
)) {
2753 // We never change the calling convention.
2754 if (!ignoreCallingConv(Func
) && !isCallingConvC
)
2756 if (Value
*V
= optimizeStringMemoryLibCall(CI
, Builder
))
2758 if (Value
*V
= optimizeFloatingPointLibCall(CI
, Func
, Builder
))
2764 return optimizeFFS(CI
, Builder
);
2768 return optimizeFls(CI
, Builder
);
2772 return optimizeAbs(CI
, Builder
);
2773 case LibFunc_isdigit
:
2774 return optimizeIsDigit(CI
, Builder
);
2775 case LibFunc_isascii
:
2776 return optimizeIsAscii(CI
, Builder
);
2777 case LibFunc_toascii
:
2778 return optimizeToAscii(CI
, Builder
);
2782 return optimizeAtoi(CI
, Builder
);
2783 case LibFunc_strtol
:
2784 case LibFunc_strtoll
:
2785 return optimizeStrtol(CI
, Builder
);
2786 case LibFunc_printf
:
2787 return optimizePrintF(CI
, Builder
);
2788 case LibFunc_sprintf
:
2789 return optimizeSPrintF(CI
, Builder
);
2790 case LibFunc_snprintf
:
2791 return optimizeSnPrintF(CI
, Builder
);
2792 case LibFunc_fprintf
:
2793 return optimizeFPrintF(CI
, Builder
);
2794 case LibFunc_fwrite
:
2795 return optimizeFWrite(CI
, Builder
);
2797 return optimizeFRead(CI
, Builder
);
2799 return optimizeFPuts(CI
, Builder
);
2801 return optimizeFGets(CI
, Builder
);
2803 return optimizeFPutc(CI
, Builder
);
2805 return optimizeFGetc(CI
, Builder
);
2807 return optimizePuts(CI
, Builder
);
2808 case LibFunc_perror
:
2809 return optimizeErrorReporting(CI
, Builder
);
2810 case LibFunc_vfprintf
:
2811 case LibFunc_fiprintf
:
2812 return optimizeErrorReporting(CI
, Builder
, 0);
2820 LibCallSimplifier::LibCallSimplifier(
2821 const DataLayout
&DL
, const TargetLibraryInfo
*TLI
,
2822 OptimizationRemarkEmitter
&ORE
,
2823 BlockFrequencyInfo
*BFI
, ProfileSummaryInfo
*PSI
,
2824 function_ref
<void(Instruction
*, Value
*)> Replacer
,
2825 function_ref
<void(Instruction
*)> Eraser
)
2826 : FortifiedSimplifier(TLI
), DL(DL
), TLI(TLI
), ORE(ORE
), BFI(BFI
), PSI(PSI
),
2827 UnsafeFPShrink(false), Replacer(Replacer
), Eraser(Eraser
) {}
2829 void LibCallSimplifier::replaceAllUsesWith(Instruction
*I
, Value
*With
) {
2830 // Indirect through the replacer used in this instance.
2834 void LibCallSimplifier::eraseFromParent(Instruction
*I
) {
2839 // Additional cases that we need to add to this file:
2842 // * cbrt(expN(X)) -> expN(x/3)
2843 // * cbrt(sqrt(x)) -> pow(x,1/6)
2844 // * cbrt(cbrt(x)) -> pow(x,1/9)
2847 // * exp(log(x)) -> x
2850 // * log(exp(x)) -> x
2851 // * log(exp(y)) -> y*log(e)
2852 // * log(exp10(y)) -> y*log(10)
2853 // * log(sqrt(x)) -> 0.5*log(x)
2856 // * pow(sqrt(x),y) -> pow(x,y*0.5)
2857 // * pow(pow(x,y),z)-> pow(x,y*z)
2860 // * signbit(cnst) -> cnst'
2861 // * signbit(nncst) -> 0 (if pstv is a non-negative constant)
2863 // sqrt, sqrtf, sqrtl:
2864 // * sqrt(expN(x)) -> expN(x*0.5)
2865 // * sqrt(Nroot(x)) -> pow(x,1/(2*N))
2866 // * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
2869 //===----------------------------------------------------------------------===//
2870 // Fortified Library Call Optimizations
2871 //===----------------------------------------------------------------------===//
2874 FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst
*CI
,
2876 Optional
<unsigned> SizeOp
,
2877 Optional
<unsigned> StrOp
,
2878 Optional
<unsigned> FlagOp
) {
2879 // If this function takes a flag argument, the implementation may use it to
2880 // perform extra checks. Don't fold into the non-checking variant.
2882 ConstantInt
*Flag
= dyn_cast
<ConstantInt
>(CI
->getArgOperand(*FlagOp
));
2883 if (!Flag
|| !Flag
->isZero())
2887 if (SizeOp
&& CI
->getArgOperand(ObjSizeOp
) == CI
->getArgOperand(*SizeOp
))
2890 if (ConstantInt
*ObjSizeCI
=
2891 dyn_cast
<ConstantInt
>(CI
->getArgOperand(ObjSizeOp
))) {
2892 if (ObjSizeCI
->isMinusOne())
2894 // If the object size wasn't -1 (unknown), bail out if we were asked to.
2895 if (OnlyLowerUnknownSize
)
2898 uint64_t Len
= GetStringLength(CI
->getArgOperand(*StrOp
));
2899 // If the length is 0 we don't know how long it is and so we can't
2900 // remove the check.
2903 return ObjSizeCI
->getZExtValue() >= Len
;
2907 if (ConstantInt
*SizeCI
=
2908 dyn_cast
<ConstantInt
>(CI
->getArgOperand(*SizeOp
)))
2909 return ObjSizeCI
->getZExtValue() >= SizeCI
->getZExtValue();
2915 Value
*FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst
*CI
,
2917 if (isFortifiedCallFoldable(CI
, 3, 2)) {
2918 B
.CreateMemCpy(CI
->getArgOperand(0), 1, CI
->getArgOperand(1), 1,
2919 CI
->getArgOperand(2));
2920 return CI
->getArgOperand(0);
2925 Value
*FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst
*CI
,
2927 if (isFortifiedCallFoldable(CI
, 3, 2)) {
2928 B
.CreateMemMove(CI
->getArgOperand(0), 1, CI
->getArgOperand(1), 1,
2929 CI
->getArgOperand(2));
2930 return CI
->getArgOperand(0);
2935 Value
*FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst
*CI
,
2937 // TODO: Try foldMallocMemset() here.
2939 if (isFortifiedCallFoldable(CI
, 3, 2)) {
2940 Value
*Val
= B
.CreateIntCast(CI
->getArgOperand(1), B
.getInt8Ty(), false);
2941 B
.CreateMemSet(CI
->getArgOperand(0), Val
, CI
->getArgOperand(2), 1);
2942 return CI
->getArgOperand(0);
2947 Value
*FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst
*CI
,
2950 const DataLayout
&DL
= CI
->getModule()->getDataLayout();
2951 Value
*Dst
= CI
->getArgOperand(0), *Src
= CI
->getArgOperand(1),
2952 *ObjSize
= CI
->getArgOperand(2);
2954 // __stpcpy_chk(x,x,...) -> x+strlen(x)
2955 if (Func
== LibFunc_stpcpy_chk
&& !OnlyLowerUnknownSize
&& Dst
== Src
) {
2956 Value
*StrLen
= emitStrLen(Src
, B
, DL
, TLI
);
2957 return StrLen
? B
.CreateInBoundsGEP(B
.getInt8Ty(), Dst
, StrLen
) : nullptr;
2960 // If a) we don't have any length information, or b) we know this will
2961 // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
2962 // st[rp]cpy_chk call which may fail at runtime if the size is too long.
2963 // TODO: It might be nice to get a maximum length out of the possible
2964 // string lengths for varying.
2965 if (isFortifiedCallFoldable(CI
, 2, None
, 1)) {
2966 if (Func
== LibFunc_strcpy_chk
)
2967 return emitStrCpy(Dst
, Src
, B
, TLI
);
2969 return emitStpCpy(Dst
, Src
, B
, TLI
);
2972 if (OnlyLowerUnknownSize
)
2975 // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
2976 uint64_t Len
= GetStringLength(Src
);
2980 Type
*SizeTTy
= DL
.getIntPtrType(CI
->getContext());
2981 Value
*LenV
= ConstantInt::get(SizeTTy
, Len
);
2982 Value
*Ret
= emitMemCpyChk(Dst
, Src
, LenV
, ObjSize
, B
, DL
, TLI
);
2983 // If the function was an __stpcpy_chk, and we were able to fold it into
2984 // a __memcpy_chk, we still need to return the correct end pointer.
2985 if (Ret
&& Func
== LibFunc_stpcpy_chk
)
2986 return B
.CreateGEP(B
.getInt8Ty(), Dst
, ConstantInt::get(SizeTTy
, Len
- 1));
2990 Value
*FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst
*CI
,
2993 if (isFortifiedCallFoldable(CI
, 3, 2)) {
2994 if (Func
== LibFunc_strncpy_chk
)
2995 return emitStrNCpy(CI
->getArgOperand(0), CI
->getArgOperand(1),
2996 CI
->getArgOperand(2), B
, TLI
);
2998 return emitStpNCpy(CI
->getArgOperand(0), CI
->getArgOperand(1),
2999 CI
->getArgOperand(2), B
, TLI
);
3005 Value
*FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst
*CI
,
3007 if (isFortifiedCallFoldable(CI
, 4, 3))
3008 return emitMemCCpy(CI
->getArgOperand(0), CI
->getArgOperand(1),
3009 CI
->getArgOperand(2), CI
->getArgOperand(3), B
, TLI
);
3014 Value
*FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst
*CI
,
3016 if (isFortifiedCallFoldable(CI
, 3, 1, None
, 2)) {
3017 SmallVector
<Value
*, 8> VariadicArgs(CI
->arg_begin() + 5, CI
->arg_end());
3018 return emitSNPrintf(CI
->getArgOperand(0), CI
->getArgOperand(1),
3019 CI
->getArgOperand(4), VariadicArgs
, B
, TLI
);
3025 Value
*FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst
*CI
,
3027 if (isFortifiedCallFoldable(CI
, 2, None
, None
, 1)) {
3028 SmallVector
<Value
*, 8> VariadicArgs(CI
->arg_begin() + 4, CI
->arg_end());
3029 return emitSPrintf(CI
->getArgOperand(0), CI
->getArgOperand(3), VariadicArgs
,
3036 Value
*FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst
*CI
,
3038 if (isFortifiedCallFoldable(CI
, 2))
3039 return emitStrCat(CI
->getArgOperand(0), CI
->getArgOperand(1), B
, TLI
);
3044 Value
*FortifiedLibCallSimplifier::optimizeStrLCat(CallInst
*CI
,
3046 if (isFortifiedCallFoldable(CI
, 3))
3047 return emitStrLCat(CI
->getArgOperand(0), CI
->getArgOperand(1),
3048 CI
->getArgOperand(2), B
, TLI
);
3053 Value
*FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst
*CI
,
3055 if (isFortifiedCallFoldable(CI
, 3))
3056 return emitStrNCat(CI
->getArgOperand(0), CI
->getArgOperand(1),
3057 CI
->getArgOperand(2), B
, TLI
);
3062 Value
*FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst
*CI
,
3064 if (isFortifiedCallFoldable(CI
, 3))
3065 return emitStrLCpy(CI
->getArgOperand(0), CI
->getArgOperand(1),
3066 CI
->getArgOperand(2), B
, TLI
);
3071 Value
*FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst
*CI
,
3073 if (isFortifiedCallFoldable(CI
, 3, 1, None
, 2))
3074 return emitVSNPrintf(CI
->getArgOperand(0), CI
->getArgOperand(1),
3075 CI
->getArgOperand(4), CI
->getArgOperand(5), B
, TLI
);
3080 Value
*FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst
*CI
,
3082 if (isFortifiedCallFoldable(CI
, 2, None
, None
, 1))
3083 return emitVSPrintf(CI
->getArgOperand(0), CI
->getArgOperand(3),
3084 CI
->getArgOperand(4), B
, TLI
);
3089 Value
*FortifiedLibCallSimplifier::optimizeCall(CallInst
*CI
) {
3090 // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
3091 // Some clang users checked for _chk libcall availability using:
3092 // __has_builtin(__builtin___memcpy_chk)
3093 // When compiling with -fno-builtin, this is always true.
3094 // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
3095 // end up with fortified libcalls, which isn't acceptable in a freestanding
3096 // environment which only provides their non-fortified counterparts.
3098 // Until we change clang and/or teach external users to check for availability
3099 // differently, disregard the "nobuiltin" attribute and TLI::has.
3104 Function
*Callee
= CI
->getCalledFunction();
3106 SmallVector
<OperandBundleDef
, 2> OpBundles
;
3107 CI
->getOperandBundlesAsDefs(OpBundles
);
3108 IRBuilder
<> Builder(CI
, /*FPMathTag=*/nullptr, OpBundles
);
3109 bool isCallingConvC
= isCallingConvCCompatible(CI
);
3111 // First, check that this is a known library functions and that the prototype
3113 if (!TLI
->getLibFunc(*Callee
, Func
))
3116 // We never change the calling convention.
3117 if (!ignoreCallingConv(Func
) && !isCallingConvC
)
3121 case LibFunc_memcpy_chk
:
3122 return optimizeMemCpyChk(CI
, Builder
);
3123 case LibFunc_memmove_chk
:
3124 return optimizeMemMoveChk(CI
, Builder
);
3125 case LibFunc_memset_chk
:
3126 return optimizeMemSetChk(CI
, Builder
);
3127 case LibFunc_stpcpy_chk
:
3128 case LibFunc_strcpy_chk
:
3129 return optimizeStrpCpyChk(CI
, Builder
, Func
);
3130 case LibFunc_stpncpy_chk
:
3131 case LibFunc_strncpy_chk
:
3132 return optimizeStrpNCpyChk(CI
, Builder
, Func
);
3133 case LibFunc_memccpy_chk
:
3134 return optimizeMemCCpyChk(CI
, Builder
);
3135 case LibFunc_snprintf_chk
:
3136 return optimizeSNPrintfChk(CI
, Builder
);
3137 case LibFunc_sprintf_chk
:
3138 return optimizeSPrintfChk(CI
, Builder
);
3139 case LibFunc_strcat_chk
:
3140 return optimizeStrCatChk(CI
, Builder
);
3141 case LibFunc_strlcat_chk
:
3142 return optimizeStrLCat(CI
, Builder
);
3143 case LibFunc_strncat_chk
:
3144 return optimizeStrNCatChk(CI
, Builder
);
3145 case LibFunc_strlcpy_chk
:
3146 return optimizeStrLCpyChk(CI
, Builder
);
3147 case LibFunc_vsnprintf_chk
:
3148 return optimizeVSNPrintfChk(CI
, Builder
);
3149 case LibFunc_vsprintf_chk
:
3150 return optimizeVSPrintfChk(CI
, Builder
);
3157 FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
3158 const TargetLibraryInfo
*TLI
, bool OnlyLowerUnknownSize
)
3159 : TLI(TLI
), OnlyLowerUnknownSize(OnlyLowerUnknownSize
) {}