1 //===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11 // ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12 // auto-generated functions. This is useful for targets like x86_64 that cannot
13 // lower fp convertions with more than 128 bits.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/CodeGen/ExpandLargeFpConvert.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/CodeGen/Passes.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetPassConfig.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstIterator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Target/TargetMachine.h"
35 static cl::opt
<unsigned>
36 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden
,
37 cl::init(llvm::IntegerType::MAX_INT_BITS
),
38 cl::desc("fp convert instructions on integers with "
39 "more than <N> bits are expanded."));
41 /// Generate code to convert a fp number to integer, replacing FPToS(U)I with
42 /// the generated code. This currently generates code similarly to compiler-rt's
45 /// An example IR generated from compiler-rt/fixsfdi.c looks like below:
46 /// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
48 /// %0 = bitcast float %a to i32
49 /// %conv.i = zext i32 %0 to i64
50 /// %tobool.not = icmp sgt i32 %0, -1
51 /// %conv = select i1 %tobool.not, i64 1, i64 -1
52 /// %and = lshr i64 %conv.i, 23
53 /// %shr = and i64 %and, 255
54 /// %and2 = and i64 %conv.i, 8388607
55 /// %or = or i64 %and2, 8388608
56 /// %cmp = icmp ult i64 %shr, 127
57 /// br i1 %cmp, label %cleanup, label %if.end
59 /// if.end: ; preds = %entry
60 /// %sub = add nuw nsw i64 %shr, 4294967169
61 /// %conv5 = and i64 %sub, 4294967232
62 /// %cmp6.not = icmp eq i64 %conv5, 0
63 /// br i1 %cmp6.not, label %if.end12, label %if.then8
65 /// if.then8: ; preds = %if.end
66 /// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
69 /// if.end12: ; preds = %if.end
70 /// %cmp13 = icmp ult i64 %shr, 150
71 /// br i1 %cmp13, label %if.then15, label %if.else
73 /// if.then15: ; preds = %if.end12
74 /// %sub16 = sub nuw nsw i64 150, %shr
75 /// %shr17 = lshr i64 %or, %sub16
76 /// %mul = mul nsw i64 %shr17, %conv
79 /// if.else: ; preds = %if.end12
80 /// %sub18 = add nsw i64 %shr, -150
81 /// %shl = shl i64 %or, %sub18
82 /// %mul19 = mul nsw i64 %shl, %conv
85 /// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
86 /// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
90 /// Replace fp to integer with generated code.
91 static void expandFPToI(Instruction
*FPToI
) {
92 IRBuilder
<> Builder(FPToI
);
93 auto *FloatVal
= FPToI
->getOperand(0);
94 IntegerType
*IntTy
= cast
<IntegerType
>(FPToI
->getType());
96 unsigned BitWidth
= FPToI
->getType()->getIntegerBitWidth();
97 unsigned FPMantissaWidth
= FloatVal
->getType()->getFPMantissaWidth() - 1;
99 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
100 // to i32 first following a sext/zext to target integer type.
102 if (FloatVal
->getType()->isHalfTy()) {
103 if (FPToI
->getOpcode() == Instruction::FPToUI
) {
104 Value
*A0
= Builder
.CreateFPToUI(FloatVal
, Builder
.getIntNTy(32));
105 A1
= Builder
.CreateZExt(A0
, IntTy
);
107 Value
*A0
= Builder
.CreateFPToSI(FloatVal
, Builder
.getIntNTy(32));
108 A1
= Builder
.CreateSExt(A0
, IntTy
);
110 FPToI
->replaceAllUsesWith(A1
);
111 FPToI
->dropAllReferences();
112 FPToI
->eraseFromParent();
116 // fp80 conversion is implemented by fpext to fp128 first then do the
118 FPMantissaWidth
= FPMantissaWidth
== 63 ? 112 : FPMantissaWidth
;
119 unsigned FloatWidth
=
120 PowerOf2Ceil(FloatVal
->getType()->getScalarSizeInBits());
121 unsigned ExponentWidth
= FloatWidth
- FPMantissaWidth
- 1;
122 unsigned ExponentBias
= (1 << (ExponentWidth
- 1)) - 1;
123 Value
*ImplicitBit
= Builder
.CreateShl(
124 Builder
.getIntN(BitWidth
, 1), Builder
.getIntN(BitWidth
, FPMantissaWidth
));
125 Value
*SignificandMask
=
126 Builder
.CreateSub(ImplicitBit
, Builder
.getIntN(BitWidth
, 1));
127 Value
*NegOne
= Builder
.CreateSExt(
128 ConstantInt::getSigned(Builder
.getInt32Ty(), -1), IntTy
);
130 Builder
.CreateShl(ConstantInt::getSigned(IntTy
, 1),
131 ConstantInt::getSigned(IntTy
, BitWidth
- 1));
133 BasicBlock
*Entry
= Builder
.GetInsertBlock();
134 Function
*F
= Entry
->getParent();
135 Entry
->setName(Twine(Entry
->getName(), "fp-to-i-entry"));
137 Entry
->splitBasicBlock(Builder
.GetInsertPoint(), "fp-to-i-cleanup");
139 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-end", F
, End
);
140 BasicBlock
*IfThen5
=
141 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-then5", F
, End
);
143 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-end9", F
, End
);
144 BasicBlock
*IfThen12
=
145 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-then12", F
, End
);
147 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-else", F
, End
);
149 Entry
->getTerminator()->eraseFromParent();
152 Builder
.SetInsertPoint(Entry
);
153 Value
*FloatVal0
= FloatVal
;
154 // fp80 conversion is implemented by fpext to fp128 first then do the
156 if (FloatVal
->getType()->isX86_FP80Ty())
158 Builder
.CreateFPExt(FloatVal
, Type::getFP128Ty(Builder
.getContext()));
160 Builder
.CreateBitCast(FloatVal0
, Builder
.getIntNTy(FloatWidth
));
161 Value
*ARep
= Builder
.CreateZExt(ARep0
, FPToI
->getType());
162 Value
*PosOrNeg
= Builder
.CreateICmpSGT(
163 ARep0
, ConstantInt::getSigned(Builder
.getIntNTy(FloatWidth
), -1));
164 Value
*Sign
= Builder
.CreateSelect(PosOrNeg
, ConstantInt::getSigned(IntTy
, 1),
165 ConstantInt::getSigned(IntTy
, -1));
167 Builder
.CreateLShr(ARep
, Builder
.getIntN(BitWidth
, FPMantissaWidth
));
168 Value
*And2
= Builder
.CreateAnd(
169 And
, Builder
.getIntN(BitWidth
, (1 << ExponentWidth
) - 1));
170 Value
*Abs
= Builder
.CreateAnd(ARep
, SignificandMask
);
171 Value
*Or
= Builder
.CreateOr(Abs
, ImplicitBit
);
173 Builder
.CreateICmpULT(And2
, Builder
.getIntN(BitWidth
, ExponentBias
));
174 Builder
.CreateCondBr(Cmp
, End
, IfEnd
);
177 Builder
.SetInsertPoint(IfEnd
);
178 Value
*Add1
= Builder
.CreateAdd(
179 And2
, ConstantInt::getSigned(
180 IntTy
, -static_cast<int64_t>(ExponentBias
+ BitWidth
)));
181 Value
*Cmp3
= Builder
.CreateICmpULT(
182 Add1
, ConstantInt::getSigned(IntTy
, -static_cast<int64_t>(BitWidth
)));
183 Builder
.CreateCondBr(Cmp3
, IfThen5
, IfEnd9
);
186 Builder
.SetInsertPoint(IfThen5
);
187 Value
*PosInf
= Builder
.CreateXor(NegOne
, NegInf
);
188 Value
*Cond8
= Builder
.CreateSelect(PosOrNeg
, PosInf
, NegInf
);
189 Builder
.CreateBr(End
);
192 Builder
.SetInsertPoint(IfEnd9
);
193 Value
*Cmp10
= Builder
.CreateICmpULT(
194 And2
, Builder
.getIntN(BitWidth
, ExponentBias
+ FPMantissaWidth
));
195 Builder
.CreateCondBr(Cmp10
, IfThen12
, IfElse
);
198 Builder
.SetInsertPoint(IfThen12
);
199 Value
*Sub13
= Builder
.CreateSub(
200 Builder
.getIntN(BitWidth
, ExponentBias
+ FPMantissaWidth
), And2
);
201 Value
*Shr14
= Builder
.CreateLShr(Or
, Sub13
);
202 Value
*Mul
= Builder
.CreateMul(Shr14
, Sign
);
203 Builder
.CreateBr(End
);
206 Builder
.SetInsertPoint(IfElse
);
207 Value
*Sub15
= Builder
.CreateAdd(
208 And2
, ConstantInt::getSigned(
209 IntTy
, -static_cast<int64_t>(ExponentBias
+ FPMantissaWidth
)));
210 Value
*Shl
= Builder
.CreateShl(Or
, Sub15
);
211 Value
*Mul16
= Builder
.CreateMul(Shl
, Sign
);
212 Builder
.CreateBr(End
);
215 Builder
.SetInsertPoint(End
, End
->begin());
216 PHINode
*Retval0
= Builder
.CreatePHI(FPToI
->getType(), 4);
218 Retval0
->addIncoming(Cond8
, IfThen5
);
219 Retval0
->addIncoming(Mul
, IfThen12
);
220 Retval0
->addIncoming(Mul16
, IfElse
);
221 Retval0
->addIncoming(Builder
.getIntN(BitWidth
, 0), Entry
);
223 FPToI
->replaceAllUsesWith(Retval0
);
224 FPToI
->dropAllReferences();
225 FPToI
->eraseFromParent();
228 /// Generate code to convert a fp number to integer, replacing S(U)IToFP with
229 /// the generated code. This currently generates code similarly to compiler-rt's
230 /// implementations. This implementation has an implicit assumption that integer
231 /// width is larger than fp.
233 /// An example IR generated from compiler-rt/floatdisf.c looks like below:
234 /// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
236 /// %cmp = icmp eq i64 %a, 0
237 /// br i1 %cmp, label %return, label %if.end
239 /// if.end: ; preds = %entry
240 /// %shr = ashr i64 %a, 63
241 /// %xor = xor i64 %shr, %a
242 /// %sub = sub nsw i64 %xor, %shr
243 /// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
244 /// %cast = trunc i64 %0 to i32
245 /// %sub1 = sub nuw nsw i32 64, %cast
246 /// %sub2 = xor i32 %cast, 63
247 /// %cmp3 = icmp ult i32 %cast, 40
248 /// br i1 %cmp3, label %if.then4, label %if.else
250 /// if.then4: ; preds = %if.end
251 /// switch i32 %sub1, label %sw.default [
252 /// i32 25, label %sw.bb
253 /// i32 26, label %sw.epilog
256 /// sw.bb: ; preds = %if.then4
257 /// %shl = shl i64 %sub, 1
258 /// br label %sw.epilog
260 /// sw.default: ; preds = %if.then4
261 /// %sub5 = sub nsw i64 38, %0
262 /// %sh_prom = and i64 %sub5, 4294967295
263 /// %shr6 = lshr i64 %sub, %sh_prom
264 /// %shr9 = lshr i64 274877906943, %0
265 /// %and = and i64 %shr9, %sub
266 /// %cmp10 = icmp ne i64 %and, 0
267 /// %conv11 = zext i1 %cmp10 to i64
268 /// %or = or i64 %shr6, %conv11
269 /// br label %sw.epilog
271 /// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
272 /// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
273 /// %1 = lshr i64 %a.addr.0, 2
274 /// %2 = and i64 %1, 1
275 /// %or16 = or i64 %2, %a.addr.0
276 /// %inc = add nsw i64 %or16, 1
277 /// %3 = and i64 %inc, 67108864
278 /// %tobool.not = icmp eq i64 %3, 0
279 /// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
280 /// %spec.select = ashr i64 %inc, %spec.select.v
281 /// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
282 /// br label %if.end26
284 /// if.else: ; preds = %if.end
285 /// %sub23 = add nuw nsw i64 %0, 4294967256
286 /// %sh_prom24 = and i64 %sub23, 4294967295
287 /// %shl25 = shl i64 %sub, %sh_prom24
288 /// br label %if.end26
290 /// if.end26: ; preds = %sw.epilog, %if.else
291 /// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
292 /// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
293 /// %conv27 = trunc i64 %shr to i32
294 /// %and28 = and i32 %conv27, -2147483648
295 /// %add = shl nuw nsw i32 %e.0, 23
296 /// %shl29 = add nuw nsw i32 %add, 1065353216
297 /// %conv31 = trunc i64 %a.addr.1 to i32
298 /// %and32 = and i32 %conv31, 8388607
299 /// %or30 = or i32 %and32, %and28
300 /// %or33 = or i32 %or30, %shl29
301 /// %4 = bitcast i32 %or33 to float
304 /// return: ; preds = %entry, %if.end26
305 /// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
306 /// ret float %retval.0
309 /// Replace integer to fp with generated code.
310 static void expandIToFP(Instruction
*IToFP
) {
311 IRBuilder
<> Builder(IToFP
);
312 auto *IntVal
= IToFP
->getOperand(0);
313 IntegerType
*IntTy
= cast
<IntegerType
>(IntVal
->getType());
315 unsigned BitWidth
= IntVal
->getType()->getIntegerBitWidth();
316 unsigned FPMantissaWidth
= IToFP
->getType()->getFPMantissaWidth() - 1;
317 // fp80 conversion is implemented by conversion tp fp128 first following
318 // a fptrunc to fp80.
319 FPMantissaWidth
= FPMantissaWidth
== 63 ? 112 : FPMantissaWidth
;
320 // FIXME: As there is no related builtins added in compliler-rt,
321 // here currently utilized the fp32 <-> fp16 lib calls to implement.
322 FPMantissaWidth
= FPMantissaWidth
== 10 ? 23 : FPMantissaWidth
;
323 FPMantissaWidth
= FPMantissaWidth
== 7 ? 23 : FPMantissaWidth
;
324 unsigned FloatWidth
= PowerOf2Ceil(FPMantissaWidth
);
325 bool IsSigned
= IToFP
->getOpcode() == Instruction::SIToFP
;
327 assert(BitWidth
> FloatWidth
&& "Unexpected conversion. expandIToFP() "
328 "assumes integer width is larger than fp.");
331 Builder
.CreateShl(Builder
.getIntN(BitWidth
, 1),
332 Builder
.getIntN(BitWidth
, FPMantissaWidth
+ 3));
334 BasicBlock
*Entry
= Builder
.GetInsertBlock();
335 Function
*F
= Entry
->getParent();
336 Entry
->setName(Twine(Entry
->getName(), "itofp-entry"));
338 Entry
->splitBasicBlock(Builder
.GetInsertPoint(), "itofp-return");
340 BasicBlock::Create(Builder
.getContext(), "itofp-if-end", F
, End
);
341 BasicBlock
*IfThen4
=
342 BasicBlock::Create(Builder
.getContext(), "itofp-if-then4", F
, End
);
344 BasicBlock::Create(Builder
.getContext(), "itofp-sw-bb", F
, End
);
345 BasicBlock
*SwDefault
=
346 BasicBlock::Create(Builder
.getContext(), "itofp-sw-default", F
, End
);
347 BasicBlock
*SwEpilog
=
348 BasicBlock::Create(Builder
.getContext(), "itofp-sw-epilog", F
, End
);
349 BasicBlock
*IfThen20
=
350 BasicBlock::Create(Builder
.getContext(), "itofp-if-then20", F
, End
);
352 BasicBlock::Create(Builder
.getContext(), "itofp-if-else", F
, End
);
353 BasicBlock
*IfEnd26
=
354 BasicBlock::Create(Builder
.getContext(), "itofp-if-end26", F
, End
);
356 Entry
->getTerminator()->eraseFromParent();
359 Intrinsic::getDeclaration(F
->getParent(), Intrinsic::ctlz
, IntTy
);
360 ConstantInt
*True
= Builder
.getTrue();
363 Builder
.SetInsertPoint(Entry
);
364 Value
*Cmp
= Builder
.CreateICmpEQ(IntVal
, ConstantInt::getSigned(IntTy
, 0));
365 Builder
.CreateCondBr(Cmp
, End
, IfEnd
);
368 Builder
.SetInsertPoint(IfEnd
);
370 Builder
.CreateAShr(IntVal
, Builder
.getIntN(BitWidth
, BitWidth
- 1));
371 Value
*Xor
= Builder
.CreateXor(Shr
, IntVal
);
372 Value
*Sub
= Builder
.CreateSub(Xor
, Shr
);
373 Value
*Call
= Builder
.CreateCall(CTLZ
, {IsSigned
? Sub
: IntVal
, True
});
374 Value
*Cast
= Builder
.CreateTrunc(Call
, Builder
.getInt32Ty());
375 int BitWidthNew
= FloatWidth
== 128 ? BitWidth
: 32;
376 Value
*Sub1
= Builder
.CreateSub(Builder
.getIntN(BitWidthNew
, BitWidth
),
377 FloatWidth
== 128 ? Call
: Cast
);
378 Value
*Sub2
= Builder
.CreateSub(Builder
.getIntN(BitWidthNew
, BitWidth
- 1),
379 FloatWidth
== 128 ? Call
: Cast
);
380 Value
*Cmp3
= Builder
.CreateICmpSGT(
381 Sub1
, Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 1));
382 Builder
.CreateCondBr(Cmp3
, IfThen4
, IfElse
);
385 Builder
.SetInsertPoint(IfThen4
);
386 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Sub1
, SwDefault
);
387 SI
->addCase(Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 2), SwBB
);
388 SI
->addCase(Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 3), SwEpilog
);
391 Builder
.SetInsertPoint(SwBB
);
393 Builder
.CreateShl(IsSigned
? Sub
: IntVal
, Builder
.getIntN(BitWidth
, 1));
394 Builder
.CreateBr(SwEpilog
);
397 Builder
.SetInsertPoint(SwDefault
);
398 Value
*Sub5
= Builder
.CreateSub(
399 Builder
.getIntN(BitWidthNew
, BitWidth
- FPMantissaWidth
- 3),
400 FloatWidth
== 128 ? Call
: Cast
);
401 Value
*ShProm
= Builder
.CreateZExt(Sub5
, IntTy
);
402 Value
*Shr6
= Builder
.CreateLShr(IsSigned
? Sub
: IntVal
,
403 FloatWidth
== 128 ? Sub5
: ShProm
);
405 Builder
.CreateAdd(FloatWidth
== 128 ? Call
: Cast
,
406 Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 3));
407 Value
*ShProm9
= Builder
.CreateZExt(Sub8
, IntTy
);
408 Value
*Shr9
= Builder
.CreateLShr(ConstantInt::getSigned(IntTy
, -1),
409 FloatWidth
== 128 ? Sub8
: ShProm9
);
410 Value
*And
= Builder
.CreateAnd(Shr9
, IsSigned
? Sub
: IntVal
);
411 Value
*Cmp10
= Builder
.CreateICmpNE(And
, Builder
.getIntN(BitWidth
, 0));
412 Value
*Conv11
= Builder
.CreateZExt(Cmp10
, IntTy
);
413 Value
*Or
= Builder
.CreateOr(Shr6
, Conv11
);
414 Builder
.CreateBr(SwEpilog
);
417 Builder
.SetInsertPoint(SwEpilog
);
418 PHINode
*AAddr0
= Builder
.CreatePHI(IntTy
, 3);
419 AAddr0
->addIncoming(Or
, SwDefault
);
420 AAddr0
->addIncoming(IsSigned
? Sub
: IntVal
, IfThen4
);
421 AAddr0
->addIncoming(Shl
, SwBB
);
422 Value
*A0
= Builder
.CreateTrunc(AAddr0
, Builder
.getInt32Ty());
423 Value
*A1
= Builder
.CreateLShr(A0
, Builder
.getIntN(32, 2));
424 Value
*A2
= Builder
.CreateAnd(A1
, Builder
.getIntN(32, 1));
425 Value
*Conv16
= Builder
.CreateZExt(A2
, IntTy
);
426 Value
*Or17
= Builder
.CreateOr(AAddr0
, Conv16
);
427 Value
*Inc
= Builder
.CreateAdd(Or17
, Builder
.getIntN(BitWidth
, 1));
428 Value
*Shr18
= nullptr;
430 Shr18
= Builder
.CreateAShr(Inc
, Builder
.getIntN(BitWidth
, 2));
432 Shr18
= Builder
.CreateLShr(Inc
, Builder
.getIntN(BitWidth
, 2));
433 Value
*A3
= Builder
.CreateAnd(Inc
, Temp1
, "a3");
434 Value
*PosOrNeg
= Builder
.CreateICmpEQ(A3
, Builder
.getIntN(BitWidth
, 0));
435 Value
*ExtractT60
= Builder
.CreateTrunc(Shr18
, Builder
.getIntNTy(FloatWidth
));
436 Value
*Extract63
= Builder
.CreateLShr(Shr18
, Builder
.getIntN(BitWidth
, 32));
437 Value
*ExtractT64
= nullptr;
439 ExtractT64
= Builder
.CreateTrunc(Sub2
, Builder
.getInt64Ty());
441 ExtractT64
= Builder
.CreateTrunc(Extract63
, Builder
.getInt32Ty());
442 Builder
.CreateCondBr(PosOrNeg
, IfEnd26
, IfThen20
);
445 Builder
.SetInsertPoint(IfThen20
);
446 Value
*Shr21
= nullptr;
448 Shr21
= Builder
.CreateAShr(Inc
, Builder
.getIntN(BitWidth
, 3));
450 Shr21
= Builder
.CreateLShr(Inc
, Builder
.getIntN(BitWidth
, 3));
451 Value
*ExtractT
= Builder
.CreateTrunc(Shr21
, Builder
.getIntNTy(FloatWidth
));
452 Value
*Extract
= Builder
.CreateLShr(Shr21
, Builder
.getIntN(BitWidth
, 32));
453 Value
*ExtractT62
= nullptr;
455 ExtractT62
= Builder
.CreateTrunc(Sub1
, Builder
.getIntNTy(64));
457 ExtractT62
= Builder
.CreateTrunc(Extract
, Builder
.getIntNTy(32));
458 Builder
.CreateBr(IfEnd26
);
461 Builder
.SetInsertPoint(IfElse
);
462 Value
*Sub24
= Builder
.CreateAdd(
463 FloatWidth
== 128 ? Call
: Cast
,
464 ConstantInt::getSigned(Builder
.getIntNTy(BitWidthNew
),
465 -(BitWidth
- FPMantissaWidth
- 1)));
466 Value
*ShProm25
= Builder
.CreateZExt(Sub24
, IntTy
);
467 Value
*Shl26
= Builder
.CreateShl(IsSigned
? Sub
: IntVal
,
468 FloatWidth
== 128 ? Sub24
: ShProm25
);
469 Value
*ExtractT61
= Builder
.CreateTrunc(Shl26
, Builder
.getIntNTy(FloatWidth
));
470 Value
*Extract65
= Builder
.CreateLShr(Shl26
, Builder
.getIntN(BitWidth
, 32));
471 Value
*ExtractT66
= nullptr;
473 ExtractT66
= Builder
.CreateTrunc(Sub2
, Builder
.getIntNTy(64));
475 ExtractT66
= Builder
.CreateTrunc(Extract65
, Builder
.getInt32Ty());
476 Builder
.CreateBr(IfEnd26
);
479 Builder
.SetInsertPoint(IfEnd26
);
480 PHINode
*AAddr1Off0
= Builder
.CreatePHI(Builder
.getIntNTy(FloatWidth
), 3);
481 AAddr1Off0
->addIncoming(ExtractT
, IfThen20
);
482 AAddr1Off0
->addIncoming(ExtractT60
, SwEpilog
);
483 AAddr1Off0
->addIncoming(ExtractT61
, IfElse
);
484 PHINode
*AAddr1Off32
= nullptr;
485 if (FloatWidth
> 32) {
487 Builder
.CreatePHI(Builder
.getIntNTy(FloatWidth
> 80 ? 64 : 32), 3);
488 AAddr1Off32
->addIncoming(ExtractT62
, IfThen20
);
489 AAddr1Off32
->addIncoming(ExtractT64
, SwEpilog
);
490 AAddr1Off32
->addIncoming(ExtractT66
, IfElse
);
492 PHINode
*E0
= nullptr;
493 if (FloatWidth
<= 80) {
494 E0
= Builder
.CreatePHI(Builder
.getIntNTy(BitWidthNew
), 3);
495 E0
->addIncoming(Sub1
, IfThen20
);
496 E0
->addIncoming(Sub2
, SwEpilog
);
497 E0
->addIncoming(Sub2
, IfElse
);
499 Value
*And29
= nullptr;
500 if (FloatWidth
> 80) {
501 Value
*Temp2
= Builder
.CreateShl(Builder
.getIntN(BitWidth
, 1),
502 Builder
.getIntN(BitWidth
, 63));
503 And29
= Builder
.CreateAnd(Shr
, Temp2
, "and29");
505 Value
*Conv28
= Builder
.CreateTrunc(Shr
, Builder
.getIntNTy(32));
506 And29
= Builder
.CreateAnd(
507 Conv28
, ConstantInt::getSigned(Builder
.getIntNTy(32), 0x80000000));
509 unsigned TempMod
= FPMantissaWidth
% 32;
510 Value
*And34
= nullptr;
511 Value
*Shl30
= nullptr;
512 if (FloatWidth
> 80) {
514 Value
*Add
= Builder
.CreateShl(AAddr1Off32
, Builder
.getIntN(64, TempMod
));
515 Shl30
= Builder
.CreateAdd(
517 Builder
.getIntN(64, ((1ull << (62ull - TempMod
)) - 1ull) << TempMod
));
518 And34
= Builder
.CreateZExt(Shl30
, Builder
.getIntNTy(128));
520 Value
*Add
= Builder
.CreateShl(E0
, Builder
.getIntN(32, TempMod
));
521 Shl30
= Builder
.CreateAdd(
522 Add
, Builder
.getIntN(32, ((1 << (30 - TempMod
)) - 1) << TempMod
));
523 And34
= Builder
.CreateAnd(FloatWidth
> 32 ? AAddr1Off32
: AAddr1Off0
,
524 Builder
.getIntN(32, (1 << TempMod
) - 1));
526 Value
*Or35
= nullptr;
527 if (FloatWidth
> 80) {
528 Value
*And29Trunc
= Builder
.CreateTrunc(And29
, Builder
.getIntNTy(128));
529 Value
*Or31
= Builder
.CreateOr(And29Trunc
, And34
);
530 Value
*Or34
= Builder
.CreateShl(Or31
, Builder
.getIntN(128, 64));
531 Value
*Temp3
= Builder
.CreateShl(Builder
.getIntN(128, 1),
532 Builder
.getIntN(128, FPMantissaWidth
));
533 Value
*Temp4
= Builder
.CreateSub(Temp3
, Builder
.getIntN(128, 1));
534 Value
*A6
= Builder
.CreateAnd(AAddr1Off0
, Temp4
);
535 Or35
= Builder
.CreateOr(Or34
, A6
);
537 Value
*Or31
= Builder
.CreateOr(And34
, And29
);
538 Or35
= Builder
.CreateOr(IsSigned
? Or31
: And34
, Shl30
);
541 if (IToFP
->getType()->isDoubleTy()) {
542 Value
*ZExt1
= Builder
.CreateZExt(Or35
, Builder
.getIntNTy(FloatWidth
));
543 Value
*Shl1
= Builder
.CreateShl(ZExt1
, Builder
.getIntN(FloatWidth
, 32));
545 Builder
.CreateAnd(AAddr1Off0
, Builder
.getIntN(FloatWidth
, 0xFFFFFFFF));
546 Value
*Or1
= Builder
.CreateOr(Shl1
, And1
);
547 A4
= Builder
.CreateBitCast(Or1
, IToFP
->getType());
548 } else if (IToFP
->getType()->isX86_FP80Ty()) {
550 Builder
.CreateBitCast(Or35
, Type::getFP128Ty(Builder
.getContext()));
551 A4
= Builder
.CreateFPTrunc(A40
, IToFP
->getType());
552 } else if (IToFP
->getType()->isHalfTy() || IToFP
->getType()->isBFloatTy()) {
553 // Deal with "half" situation. This is a workaround since we don't have
554 // floattihf.c currently as referring.
556 Builder
.CreateBitCast(Or35
, Type::getFloatTy(Builder
.getContext()));
557 A4
= Builder
.CreateFPTrunc(A40
, IToFP
->getType());
559 A4
= Builder
.CreateBitCast(Or35
, IToFP
->getType());
560 Builder
.CreateBr(End
);
563 Builder
.SetInsertPoint(End
, End
->begin());
564 PHINode
*Retval0
= Builder
.CreatePHI(IToFP
->getType(), 2);
565 Retval0
->addIncoming(A4
, IfEnd26
);
566 Retval0
->addIncoming(ConstantFP::getZero(IToFP
->getType(), false), Entry
);
568 IToFP
->replaceAllUsesWith(Retval0
);
569 IToFP
->dropAllReferences();
570 IToFP
->eraseFromParent();
573 static void scalarize(Instruction
*I
, SmallVectorImpl
<Instruction
*> &Replace
) {
574 VectorType
*VTy
= cast
<FixedVectorType
>(I
->getType());
576 IRBuilder
<> Builder(I
);
578 unsigned NumElements
= VTy
->getElementCount().getFixedValue();
579 Value
*Result
= PoisonValue::get(VTy
);
580 for (unsigned Idx
= 0; Idx
< NumElements
; ++Idx
) {
581 Value
*Ext
= Builder
.CreateExtractElement(I
->getOperand(0), Idx
);
582 Value
*Cast
= Builder
.CreateCast(cast
<CastInst
>(I
)->getOpcode(), Ext
,
583 I
->getType()->getScalarType());
584 Result
= Builder
.CreateInsertElement(Result
, Cast
, Idx
);
585 if (isa
<Instruction
>(Cast
))
586 Replace
.push_back(cast
<Instruction
>(Cast
));
588 I
->replaceAllUsesWith(Result
);
589 I
->dropAllReferences();
590 I
->eraseFromParent();
593 static bool runImpl(Function
&F
, const TargetLowering
&TLI
) {
594 SmallVector
<Instruction
*, 4> Replace
;
595 SmallVector
<Instruction
*, 4> ReplaceVector
;
596 bool Modified
= false;
598 unsigned MaxLegalFpConvertBitWidth
=
599 TLI
.getMaxLargeFPConvertBitWidthSupported();
600 if (ExpandFpConvertBits
!= llvm::IntegerType::MAX_INT_BITS
)
601 MaxLegalFpConvertBitWidth
= ExpandFpConvertBits
;
603 if (MaxLegalFpConvertBitWidth
>= llvm::IntegerType::MAX_INT_BITS
)
606 for (auto &I
: instructions(F
)) {
607 switch (I
.getOpcode()) {
608 case Instruction::FPToUI
:
609 case Instruction::FPToSI
: {
610 // TODO: This pass doesn't handle scalable vectors.
611 if (I
.getOperand(0)->getType()->isScalableTy())
614 auto *IntTy
= cast
<IntegerType
>(I
.getType()->getScalarType());
615 if (IntTy
->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth
)
618 if (I
.getOperand(0)->getType()->isVectorTy())
619 ReplaceVector
.push_back(&I
);
621 Replace
.push_back(&I
);
625 case Instruction::UIToFP
:
626 case Instruction::SIToFP
: {
627 // TODO: This pass doesn't handle scalable vectors.
628 if (I
.getOperand(0)->getType()->isScalableTy())
632 cast
<IntegerType
>(I
.getOperand(0)->getType()->getScalarType());
633 if (IntTy
->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth
)
636 if (I
.getOperand(0)->getType()->isVectorTy())
637 ReplaceVector
.push_back(&I
);
639 Replace
.push_back(&I
);
648 while (!ReplaceVector
.empty()) {
649 Instruction
*I
= ReplaceVector
.pop_back_val();
650 scalarize(I
, Replace
);
656 while (!Replace
.empty()) {
657 Instruction
*I
= Replace
.pop_back_val();
658 if (I
->getOpcode() == Instruction::FPToUI
||
659 I
->getOpcode() == Instruction::FPToSI
) {
670 class ExpandLargeFpConvertLegacyPass
: public FunctionPass
{
674 ExpandLargeFpConvertLegacyPass() : FunctionPass(ID
) {
675 initializeExpandLargeFpConvertLegacyPassPass(
676 *PassRegistry::getPassRegistry());
679 bool runOnFunction(Function
&F
) override
{
680 auto *TM
= &getAnalysis
<TargetPassConfig
>().getTM
<TargetMachine
>();
681 auto *TLI
= TM
->getSubtargetImpl(F
)->getTargetLowering();
682 return runImpl(F
, *TLI
);
685 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
686 AU
.addRequired
<TargetPassConfig
>();
687 AU
.addPreserved
<AAResultsWrapperPass
>();
688 AU
.addPreserved
<GlobalsAAWrapperPass
>();
693 PreservedAnalyses
ExpandLargeFpConvertPass::run(Function
&F
,
694 FunctionAnalysisManager
&FAM
) {
695 const TargetSubtargetInfo
*STI
= TM
->getSubtargetImpl(F
);
696 return runImpl(F
, *STI
->getTargetLowering()) ? PreservedAnalyses::none()
697 : PreservedAnalyses::all();
700 char ExpandLargeFpConvertLegacyPass::ID
= 0;
701 INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass
, "expand-large-fp-convert",
702 "Expand large fp convert", false, false)
703 INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass
, "expand-large-fp-convert",
704 "Expand large fp convert", false, false)
706 FunctionPass
*llvm::createExpandLargeFpConvertPass() {
707 return new ExpandLargeFpConvertLegacyPass();