1 //===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11 // ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12 // auto-generated functions. This is useful for targets like x86_64 that cannot
13 // lower fp convertions with more than 128 bits.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/CodeGen/ExpandLargeFpConvert.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/Analysis/GlobalsModRef.h"
20 #include "llvm/CodeGen/Passes.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetPassConfig.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/InstIterator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Target/TargetMachine.h"
34 static cl::opt
<unsigned>
35 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden
,
36 cl::init(llvm::IntegerType::MAX_INT_BITS
),
37 cl::desc("fp convert instructions on integers with "
38 "more than <N> bits are expanded."));
40 /// Generate code to convert a fp number to integer, replacing FPToS(U)I with
41 /// the generated code. This currently generates code similarly to compiler-rt's
44 /// An example IR generated from compiler-rt/fixsfdi.c looks like below:
45 /// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
47 /// %0 = bitcast float %a to i32
48 /// %conv.i = zext i32 %0 to i64
49 /// %tobool.not = icmp sgt i32 %0, -1
50 /// %conv = select i1 %tobool.not, i64 1, i64 -1
51 /// %and = lshr i64 %conv.i, 23
52 /// %shr = and i64 %and, 255
53 /// %and2 = and i64 %conv.i, 8388607
54 /// %or = or i64 %and2, 8388608
55 /// %cmp = icmp ult i64 %shr, 127
56 /// br i1 %cmp, label %cleanup, label %if.end
58 /// if.end: ; preds = %entry
59 /// %sub = add nuw nsw i64 %shr, 4294967169
60 /// %conv5 = and i64 %sub, 4294967232
61 /// %cmp6.not = icmp eq i64 %conv5, 0
62 /// br i1 %cmp6.not, label %if.end12, label %if.then8
64 /// if.then8: ; preds = %if.end
65 /// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
68 /// if.end12: ; preds = %if.end
69 /// %cmp13 = icmp ult i64 %shr, 150
70 /// br i1 %cmp13, label %if.then15, label %if.else
72 /// if.then15: ; preds = %if.end12
73 /// %sub16 = sub nuw nsw i64 150, %shr
74 /// %shr17 = lshr i64 %or, %sub16
75 /// %mul = mul nsw i64 %shr17, %conv
78 /// if.else: ; preds = %if.end12
79 /// %sub18 = add nsw i64 %shr, -150
80 /// %shl = shl i64 %or, %sub18
81 /// %mul19 = mul nsw i64 %shl, %conv
84 /// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
85 /// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
89 /// Replace fp to integer with generated code.
90 static void expandFPToI(Instruction
*FPToI
) {
91 IRBuilder
<> Builder(FPToI
);
92 auto *FloatVal
= FPToI
->getOperand(0);
93 IntegerType
*IntTy
= cast
<IntegerType
>(FPToI
->getType());
95 unsigned BitWidth
= FPToI
->getType()->getIntegerBitWidth();
96 unsigned FPMantissaWidth
= FloatVal
->getType()->getFPMantissaWidth() - 1;
98 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
99 // to i32 first following a sext/zext to target integer type.
101 if (FloatVal
->getType()->isHalfTy()) {
102 if (FPToI
->getOpcode() == Instruction::FPToUI
) {
103 Value
*A0
= Builder
.CreateFPToUI(FloatVal
, Builder
.getIntNTy(32));
104 A1
= Builder
.CreateZExt(A0
, IntTy
);
106 Value
*A0
= Builder
.CreateFPToSI(FloatVal
, Builder
.getIntNTy(32));
107 A1
= Builder
.CreateSExt(A0
, IntTy
);
109 FPToI
->replaceAllUsesWith(A1
);
110 FPToI
->dropAllReferences();
111 FPToI
->eraseFromParent();
115 // fp80 conversion is implemented by fpext to fp128 first then do the
117 FPMantissaWidth
= FPMantissaWidth
== 63 ? 112 : FPMantissaWidth
;
118 unsigned FloatWidth
=
119 PowerOf2Ceil(FloatVal
->getType()->getScalarSizeInBits());
120 unsigned ExponentWidth
= FloatWidth
- FPMantissaWidth
- 1;
121 unsigned ExponentBias
= (1 << (ExponentWidth
- 1)) - 1;
122 Value
*ImplicitBit
= Builder
.CreateShl(
123 Builder
.getIntN(BitWidth
, 1), Builder
.getIntN(BitWidth
, FPMantissaWidth
));
124 Value
*SignificandMask
=
125 Builder
.CreateSub(ImplicitBit
, Builder
.getIntN(BitWidth
, 1));
126 Value
*NegOne
= Builder
.CreateSExt(
127 ConstantInt::getSigned(Builder
.getInt32Ty(), -1), IntTy
);
129 Builder
.CreateShl(ConstantInt::getSigned(IntTy
, 1),
130 ConstantInt::getSigned(IntTy
, BitWidth
- 1));
132 BasicBlock
*Entry
= Builder
.GetInsertBlock();
133 Function
*F
= Entry
->getParent();
134 Entry
->setName(Twine(Entry
->getName(), "fp-to-i-entry"));
136 Entry
->splitBasicBlock(Builder
.GetInsertPoint(), "fp-to-i-cleanup");
138 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-end", F
, End
);
139 BasicBlock
*IfThen5
=
140 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-then5", F
, End
);
142 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-end9", F
, End
);
143 BasicBlock
*IfThen12
=
144 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-then12", F
, End
);
146 BasicBlock::Create(Builder
.getContext(), "fp-to-i-if-else", F
, End
);
148 Entry
->getTerminator()->eraseFromParent();
151 Builder
.SetInsertPoint(Entry
);
152 Value
*FloatVal0
= FloatVal
;
153 // fp80 conversion is implemented by fpext to fp128 first then do the
155 if (FloatVal
->getType()->isX86_FP80Ty())
157 Builder
.CreateFPExt(FloatVal
, Type::getFP128Ty(Builder
.getContext()));
159 Builder
.CreateBitCast(FloatVal0
, Builder
.getIntNTy(FloatWidth
));
160 Value
*ARep
= Builder
.CreateZExt(ARep0
, FPToI
->getType());
161 Value
*PosOrNeg
= Builder
.CreateICmpSGT(
162 ARep0
, ConstantInt::getSigned(Builder
.getIntNTy(FloatWidth
), -1));
163 Value
*Sign
= Builder
.CreateSelect(PosOrNeg
, ConstantInt::getSigned(IntTy
, 1),
164 ConstantInt::getSigned(IntTy
, -1));
166 Builder
.CreateLShr(ARep
, Builder
.getIntN(BitWidth
, FPMantissaWidth
));
167 Value
*And2
= Builder
.CreateAnd(
168 And
, Builder
.getIntN(BitWidth
, (1 << ExponentWidth
) - 1));
169 Value
*Abs
= Builder
.CreateAnd(ARep
, SignificandMask
);
170 Value
*Or
= Builder
.CreateOr(Abs
, ImplicitBit
);
172 Builder
.CreateICmpULT(And2
, Builder
.getIntN(BitWidth
, ExponentBias
));
173 Builder
.CreateCondBr(Cmp
, End
, IfEnd
);
176 Builder
.SetInsertPoint(IfEnd
);
177 Value
*Add1
= Builder
.CreateAdd(
178 And2
, ConstantInt::getSigned(
179 IntTy
, -static_cast<int64_t>(ExponentBias
+ BitWidth
)));
180 Value
*Cmp3
= Builder
.CreateICmpULT(
181 Add1
, ConstantInt::getSigned(IntTy
, -static_cast<int64_t>(BitWidth
)));
182 Builder
.CreateCondBr(Cmp3
, IfThen5
, IfEnd9
);
185 Builder
.SetInsertPoint(IfThen5
);
186 Value
*PosInf
= Builder
.CreateXor(NegOne
, NegInf
);
187 Value
*Cond8
= Builder
.CreateSelect(PosOrNeg
, PosInf
, NegInf
);
188 Builder
.CreateBr(End
);
191 Builder
.SetInsertPoint(IfEnd9
);
192 Value
*Cmp10
= Builder
.CreateICmpULT(
193 And2
, Builder
.getIntN(BitWidth
, ExponentBias
+ FPMantissaWidth
));
194 Builder
.CreateCondBr(Cmp10
, IfThen12
, IfElse
);
197 Builder
.SetInsertPoint(IfThen12
);
198 Value
*Sub13
= Builder
.CreateSub(
199 Builder
.getIntN(BitWidth
, ExponentBias
+ FPMantissaWidth
), And2
);
200 Value
*Shr14
= Builder
.CreateLShr(Or
, Sub13
);
201 Value
*Mul
= Builder
.CreateMul(Shr14
, Sign
);
202 Builder
.CreateBr(End
);
205 Builder
.SetInsertPoint(IfElse
);
206 Value
*Sub15
= Builder
.CreateAdd(
207 And2
, ConstantInt::getSigned(
208 IntTy
, -static_cast<int64_t>(ExponentBias
+ FPMantissaWidth
)));
209 Value
*Shl
= Builder
.CreateShl(Or
, Sub15
);
210 Value
*Mul16
= Builder
.CreateMul(Shl
, Sign
);
211 Builder
.CreateBr(End
);
214 Builder
.SetInsertPoint(End
, End
->begin());
215 PHINode
*Retval0
= Builder
.CreatePHI(FPToI
->getType(), 4);
217 Retval0
->addIncoming(Cond8
, IfThen5
);
218 Retval0
->addIncoming(Mul
, IfThen12
);
219 Retval0
->addIncoming(Mul16
, IfElse
);
220 Retval0
->addIncoming(Builder
.getIntN(BitWidth
, 0), Entry
);
222 FPToI
->replaceAllUsesWith(Retval0
);
223 FPToI
->dropAllReferences();
224 FPToI
->eraseFromParent();
227 /// Generate code to convert a fp number to integer, replacing S(U)IToFP with
228 /// the generated code. This currently generates code similarly to compiler-rt's
229 /// implementations. This implementation has an implicit assumption that integer
230 /// width is larger than fp.
232 /// An example IR generated from compiler-rt/floatdisf.c looks like below:
233 /// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
235 /// %cmp = icmp eq i64 %a, 0
236 /// br i1 %cmp, label %return, label %if.end
238 /// if.end: ; preds = %entry
239 /// %shr = ashr i64 %a, 63
240 /// %xor = xor i64 %shr, %a
241 /// %sub = sub nsw i64 %xor, %shr
242 /// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
243 /// %cast = trunc i64 %0 to i32
244 /// %sub1 = sub nuw nsw i32 64, %cast
245 /// %sub2 = xor i32 %cast, 63
246 /// %cmp3 = icmp ult i32 %cast, 40
247 /// br i1 %cmp3, label %if.then4, label %if.else
249 /// if.then4: ; preds = %if.end
250 /// switch i32 %sub1, label %sw.default [
251 /// i32 25, label %sw.bb
252 /// i32 26, label %sw.epilog
255 /// sw.bb: ; preds = %if.then4
256 /// %shl = shl i64 %sub, 1
257 /// br label %sw.epilog
259 /// sw.default: ; preds = %if.then4
260 /// %sub5 = sub nsw i64 38, %0
261 /// %sh_prom = and i64 %sub5, 4294967295
262 /// %shr6 = lshr i64 %sub, %sh_prom
263 /// %shr9 = lshr i64 274877906943, %0
264 /// %and = and i64 %shr9, %sub
265 /// %cmp10 = icmp ne i64 %and, 0
266 /// %conv11 = zext i1 %cmp10 to i64
267 /// %or = or i64 %shr6, %conv11
268 /// br label %sw.epilog
270 /// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
271 /// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
272 /// %1 = lshr i64 %a.addr.0, 2
273 /// %2 = and i64 %1, 1
274 /// %or16 = or i64 %2, %a.addr.0
275 /// %inc = add nsw i64 %or16, 1
276 /// %3 = and i64 %inc, 67108864
277 /// %tobool.not = icmp eq i64 %3, 0
278 /// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
279 /// %spec.select = ashr i64 %inc, %spec.select.v
280 /// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
281 /// br label %if.end26
283 /// if.else: ; preds = %if.end
284 /// %sub23 = add nuw nsw i64 %0, 4294967256
285 /// %sh_prom24 = and i64 %sub23, 4294967295
286 /// %shl25 = shl i64 %sub, %sh_prom24
287 /// br label %if.end26
289 /// if.end26: ; preds = %sw.epilog, %if.else
290 /// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
291 /// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
292 /// %conv27 = trunc i64 %shr to i32
293 /// %and28 = and i32 %conv27, -2147483648
294 /// %add = shl nuw nsw i32 %e.0, 23
295 /// %shl29 = add nuw nsw i32 %add, 1065353216
296 /// %conv31 = trunc i64 %a.addr.1 to i32
297 /// %and32 = and i32 %conv31, 8388607
298 /// %or30 = or i32 %and32, %and28
299 /// %or33 = or i32 %or30, %shl29
300 /// %4 = bitcast i32 %or33 to float
303 /// return: ; preds = %entry, %if.end26
304 /// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
305 /// ret float %retval.0
308 /// Replace integer to fp with generated code.
309 static void expandIToFP(Instruction
*IToFP
) {
310 IRBuilder
<> Builder(IToFP
);
311 auto *IntVal
= IToFP
->getOperand(0);
312 IntegerType
*IntTy
= cast
<IntegerType
>(IntVal
->getType());
314 unsigned BitWidth
= IntVal
->getType()->getIntegerBitWidth();
315 unsigned FPMantissaWidth
= IToFP
->getType()->getFPMantissaWidth() - 1;
316 // fp80 conversion is implemented by conversion tp fp128 first following
317 // a fptrunc to fp80.
318 FPMantissaWidth
= FPMantissaWidth
== 63 ? 112 : FPMantissaWidth
;
319 // FIXME: As there is no related builtins added in compliler-rt,
320 // here currently utilized the fp32 <-> fp16 lib calls to implement.
321 FPMantissaWidth
= FPMantissaWidth
== 10 ? 23 : FPMantissaWidth
;
322 FPMantissaWidth
= FPMantissaWidth
== 7 ? 23 : FPMantissaWidth
;
323 unsigned FloatWidth
= PowerOf2Ceil(FPMantissaWidth
);
324 bool IsSigned
= IToFP
->getOpcode() == Instruction::SIToFP
;
326 assert(BitWidth
> FloatWidth
&& "Unexpected conversion. expandIToFP() "
327 "assumes integer width is larger than fp.");
330 Builder
.CreateShl(Builder
.getIntN(BitWidth
, 1),
331 Builder
.getIntN(BitWidth
, FPMantissaWidth
+ 3));
333 BasicBlock
*Entry
= Builder
.GetInsertBlock();
334 Function
*F
= Entry
->getParent();
335 Entry
->setName(Twine(Entry
->getName(), "itofp-entry"));
337 Entry
->splitBasicBlock(Builder
.GetInsertPoint(), "itofp-return");
339 BasicBlock::Create(Builder
.getContext(), "itofp-if-end", F
, End
);
340 BasicBlock
*IfThen4
=
341 BasicBlock::Create(Builder
.getContext(), "itofp-if-then4", F
, End
);
343 BasicBlock::Create(Builder
.getContext(), "itofp-sw-bb", F
, End
);
344 BasicBlock
*SwDefault
=
345 BasicBlock::Create(Builder
.getContext(), "itofp-sw-default", F
, End
);
346 BasicBlock
*SwEpilog
=
347 BasicBlock::Create(Builder
.getContext(), "itofp-sw-epilog", F
, End
);
348 BasicBlock
*IfThen20
=
349 BasicBlock::Create(Builder
.getContext(), "itofp-if-then20", F
, End
);
351 BasicBlock::Create(Builder
.getContext(), "itofp-if-else", F
, End
);
352 BasicBlock
*IfEnd26
=
353 BasicBlock::Create(Builder
.getContext(), "itofp-if-end26", F
, End
);
355 Entry
->getTerminator()->eraseFromParent();
358 Intrinsic::getOrInsertDeclaration(F
->getParent(), Intrinsic::ctlz
, IntTy
);
359 ConstantInt
*True
= Builder
.getTrue();
362 Builder
.SetInsertPoint(Entry
);
363 Value
*Cmp
= Builder
.CreateICmpEQ(IntVal
, ConstantInt::getSigned(IntTy
, 0));
364 Builder
.CreateCondBr(Cmp
, End
, IfEnd
);
367 Builder
.SetInsertPoint(IfEnd
);
369 Builder
.CreateAShr(IntVal
, Builder
.getIntN(BitWidth
, BitWidth
- 1));
370 Value
*Xor
= Builder
.CreateXor(Shr
, IntVal
);
371 Value
*Sub
= Builder
.CreateSub(Xor
, Shr
);
372 Value
*Call
= Builder
.CreateCall(CTLZ
, {IsSigned
? Sub
: IntVal
, True
});
373 Value
*Cast
= Builder
.CreateTrunc(Call
, Builder
.getInt32Ty());
374 int BitWidthNew
= FloatWidth
== 128 ? BitWidth
: 32;
375 Value
*Sub1
= Builder
.CreateSub(Builder
.getIntN(BitWidthNew
, BitWidth
),
376 FloatWidth
== 128 ? Call
: Cast
);
377 Value
*Sub2
= Builder
.CreateSub(Builder
.getIntN(BitWidthNew
, BitWidth
- 1),
378 FloatWidth
== 128 ? Call
: Cast
);
379 Value
*Cmp3
= Builder
.CreateICmpSGT(
380 Sub1
, Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 1));
381 Builder
.CreateCondBr(Cmp3
, IfThen4
, IfElse
);
384 Builder
.SetInsertPoint(IfThen4
);
385 llvm::SwitchInst
*SI
= Builder
.CreateSwitch(Sub1
, SwDefault
);
386 SI
->addCase(Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 2), SwBB
);
387 SI
->addCase(Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 3), SwEpilog
);
390 Builder
.SetInsertPoint(SwBB
);
392 Builder
.CreateShl(IsSigned
? Sub
: IntVal
, Builder
.getIntN(BitWidth
, 1));
393 Builder
.CreateBr(SwEpilog
);
396 Builder
.SetInsertPoint(SwDefault
);
397 Value
*Sub5
= Builder
.CreateSub(
398 Builder
.getIntN(BitWidthNew
, BitWidth
- FPMantissaWidth
- 3),
399 FloatWidth
== 128 ? Call
: Cast
);
400 Value
*ShProm
= Builder
.CreateZExt(Sub5
, IntTy
);
401 Value
*Shr6
= Builder
.CreateLShr(IsSigned
? Sub
: IntVal
,
402 FloatWidth
== 128 ? Sub5
: ShProm
);
404 Builder
.CreateAdd(FloatWidth
== 128 ? Call
: Cast
,
405 Builder
.getIntN(BitWidthNew
, FPMantissaWidth
+ 3));
406 Value
*ShProm9
= Builder
.CreateZExt(Sub8
, IntTy
);
407 Value
*Shr9
= Builder
.CreateLShr(ConstantInt::getSigned(IntTy
, -1),
408 FloatWidth
== 128 ? Sub8
: ShProm9
);
409 Value
*And
= Builder
.CreateAnd(Shr9
, IsSigned
? Sub
: IntVal
);
410 Value
*Cmp10
= Builder
.CreateICmpNE(And
, Builder
.getIntN(BitWidth
, 0));
411 Value
*Conv11
= Builder
.CreateZExt(Cmp10
, IntTy
);
412 Value
*Or
= Builder
.CreateOr(Shr6
, Conv11
);
413 Builder
.CreateBr(SwEpilog
);
416 Builder
.SetInsertPoint(SwEpilog
);
417 PHINode
*AAddr0
= Builder
.CreatePHI(IntTy
, 3);
418 AAddr0
->addIncoming(Or
, SwDefault
);
419 AAddr0
->addIncoming(IsSigned
? Sub
: IntVal
, IfThen4
);
420 AAddr0
->addIncoming(Shl
, SwBB
);
421 Value
*A0
= Builder
.CreateTrunc(AAddr0
, Builder
.getInt32Ty());
422 Value
*A1
= Builder
.CreateLShr(A0
, Builder
.getIntN(32, 2));
423 Value
*A2
= Builder
.CreateAnd(A1
, Builder
.getIntN(32, 1));
424 Value
*Conv16
= Builder
.CreateZExt(A2
, IntTy
);
425 Value
*Or17
= Builder
.CreateOr(AAddr0
, Conv16
);
426 Value
*Inc
= Builder
.CreateAdd(Or17
, Builder
.getIntN(BitWidth
, 1));
427 Value
*Shr18
= nullptr;
429 Shr18
= Builder
.CreateAShr(Inc
, Builder
.getIntN(BitWidth
, 2));
431 Shr18
= Builder
.CreateLShr(Inc
, Builder
.getIntN(BitWidth
, 2));
432 Value
*A3
= Builder
.CreateAnd(Inc
, Temp1
, "a3");
433 Value
*PosOrNeg
= Builder
.CreateICmpEQ(A3
, Builder
.getIntN(BitWidth
, 0));
434 Value
*ExtractT60
= Builder
.CreateTrunc(Shr18
, Builder
.getIntNTy(FloatWidth
));
435 Value
*Extract63
= Builder
.CreateLShr(Shr18
, Builder
.getIntN(BitWidth
, 32));
436 Value
*ExtractT64
= nullptr;
438 ExtractT64
= Builder
.CreateTrunc(Sub2
, Builder
.getInt64Ty());
440 ExtractT64
= Builder
.CreateTrunc(Extract63
, Builder
.getInt32Ty());
441 Builder
.CreateCondBr(PosOrNeg
, IfEnd26
, IfThen20
);
444 Builder
.SetInsertPoint(IfThen20
);
445 Value
*Shr21
= nullptr;
447 Shr21
= Builder
.CreateAShr(Inc
, Builder
.getIntN(BitWidth
, 3));
449 Shr21
= Builder
.CreateLShr(Inc
, Builder
.getIntN(BitWidth
, 3));
450 Value
*ExtractT
= Builder
.CreateTrunc(Shr21
, Builder
.getIntNTy(FloatWidth
));
451 Value
*Extract
= Builder
.CreateLShr(Shr21
, Builder
.getIntN(BitWidth
, 32));
452 Value
*ExtractT62
= nullptr;
454 ExtractT62
= Builder
.CreateTrunc(Sub1
, Builder
.getIntNTy(64));
456 ExtractT62
= Builder
.CreateTrunc(Extract
, Builder
.getIntNTy(32));
457 Builder
.CreateBr(IfEnd26
);
460 Builder
.SetInsertPoint(IfElse
);
461 Value
*Sub24
= Builder
.CreateAdd(
462 FloatWidth
== 128 ? Call
: Cast
,
463 ConstantInt::getSigned(Builder
.getIntNTy(BitWidthNew
),
464 -(BitWidth
- FPMantissaWidth
- 1)));
465 Value
*ShProm25
= Builder
.CreateZExt(Sub24
, IntTy
);
466 Value
*Shl26
= Builder
.CreateShl(IsSigned
? Sub
: IntVal
,
467 FloatWidth
== 128 ? Sub24
: ShProm25
);
468 Value
*ExtractT61
= Builder
.CreateTrunc(Shl26
, Builder
.getIntNTy(FloatWidth
));
469 Value
*Extract65
= Builder
.CreateLShr(Shl26
, Builder
.getIntN(BitWidth
, 32));
470 Value
*ExtractT66
= nullptr;
472 ExtractT66
= Builder
.CreateTrunc(Sub2
, Builder
.getIntNTy(64));
474 ExtractT66
= Builder
.CreateTrunc(Extract65
, Builder
.getInt32Ty());
475 Builder
.CreateBr(IfEnd26
);
478 Builder
.SetInsertPoint(IfEnd26
);
479 PHINode
*AAddr1Off0
= Builder
.CreatePHI(Builder
.getIntNTy(FloatWidth
), 3);
480 AAddr1Off0
->addIncoming(ExtractT
, IfThen20
);
481 AAddr1Off0
->addIncoming(ExtractT60
, SwEpilog
);
482 AAddr1Off0
->addIncoming(ExtractT61
, IfElse
);
483 PHINode
*AAddr1Off32
= nullptr;
484 if (FloatWidth
> 32) {
486 Builder
.CreatePHI(Builder
.getIntNTy(FloatWidth
> 80 ? 64 : 32), 3);
487 AAddr1Off32
->addIncoming(ExtractT62
, IfThen20
);
488 AAddr1Off32
->addIncoming(ExtractT64
, SwEpilog
);
489 AAddr1Off32
->addIncoming(ExtractT66
, IfElse
);
491 PHINode
*E0
= nullptr;
492 if (FloatWidth
<= 80) {
493 E0
= Builder
.CreatePHI(Builder
.getIntNTy(BitWidthNew
), 3);
494 E0
->addIncoming(Sub1
, IfThen20
);
495 E0
->addIncoming(Sub2
, SwEpilog
);
496 E0
->addIncoming(Sub2
, IfElse
);
498 Value
*And29
= nullptr;
499 if (FloatWidth
> 80) {
500 Value
*Temp2
= Builder
.CreateShl(Builder
.getIntN(BitWidth
, 1),
501 Builder
.getIntN(BitWidth
, 63));
502 And29
= Builder
.CreateAnd(Shr
, Temp2
, "and29");
504 Value
*Conv28
= Builder
.CreateTrunc(Shr
, Builder
.getIntNTy(32));
505 And29
= Builder
.CreateAnd(
506 Conv28
, ConstantInt::getSigned(Builder
.getIntNTy(32), 0x80000000));
508 unsigned TempMod
= FPMantissaWidth
% 32;
509 Value
*And34
= nullptr;
510 Value
*Shl30
= nullptr;
511 if (FloatWidth
> 80) {
513 Value
*Add
= Builder
.CreateShl(AAddr1Off32
, Builder
.getIntN(64, TempMod
));
514 Shl30
= Builder
.CreateAdd(
516 Builder
.getIntN(64, ((1ull << (62ull - TempMod
)) - 1ull) << TempMod
));
517 And34
= Builder
.CreateZExt(Shl30
, Builder
.getIntNTy(128));
519 Value
*Add
= Builder
.CreateShl(E0
, Builder
.getIntN(32, TempMod
));
520 Shl30
= Builder
.CreateAdd(
521 Add
, Builder
.getIntN(32, ((1 << (30 - TempMod
)) - 1) << TempMod
));
522 And34
= Builder
.CreateAnd(FloatWidth
> 32 ? AAddr1Off32
: AAddr1Off0
,
523 Builder
.getIntN(32, (1 << TempMod
) - 1));
525 Value
*Or35
= nullptr;
526 if (FloatWidth
> 80) {
527 Value
*And29Trunc
= Builder
.CreateTrunc(And29
, Builder
.getIntNTy(128));
528 Value
*Or31
= Builder
.CreateOr(And29Trunc
, And34
);
529 Value
*Or34
= Builder
.CreateShl(Or31
, Builder
.getIntN(128, 64));
530 Value
*Temp3
= Builder
.CreateShl(Builder
.getIntN(128, 1),
531 Builder
.getIntN(128, FPMantissaWidth
));
532 Value
*Temp4
= Builder
.CreateSub(Temp3
, Builder
.getIntN(128, 1));
533 Value
*A6
= Builder
.CreateAnd(AAddr1Off0
, Temp4
);
534 Or35
= Builder
.CreateOr(Or34
, A6
);
536 Value
*Or31
= Builder
.CreateOr(And34
, And29
);
537 Or35
= Builder
.CreateOr(IsSigned
? Or31
: And34
, Shl30
);
540 if (IToFP
->getType()->isDoubleTy()) {
541 Value
*ZExt1
= Builder
.CreateZExt(Or35
, Builder
.getIntNTy(FloatWidth
));
542 Value
*Shl1
= Builder
.CreateShl(ZExt1
, Builder
.getIntN(FloatWidth
, 32));
544 Builder
.CreateAnd(AAddr1Off0
, Builder
.getIntN(FloatWidth
, 0xFFFFFFFF));
545 Value
*Or1
= Builder
.CreateOr(Shl1
, And1
);
546 A4
= Builder
.CreateBitCast(Or1
, IToFP
->getType());
547 } else if (IToFP
->getType()->isX86_FP80Ty()) {
549 Builder
.CreateBitCast(Or35
, Type::getFP128Ty(Builder
.getContext()));
550 A4
= Builder
.CreateFPTrunc(A40
, IToFP
->getType());
551 } else if (IToFP
->getType()->isHalfTy() || IToFP
->getType()->isBFloatTy()) {
552 // Deal with "half" situation. This is a workaround since we don't have
553 // floattihf.c currently as referring.
555 Builder
.CreateBitCast(Or35
, Type::getFloatTy(Builder
.getContext()));
556 A4
= Builder
.CreateFPTrunc(A40
, IToFP
->getType());
558 A4
= Builder
.CreateBitCast(Or35
, IToFP
->getType());
559 Builder
.CreateBr(End
);
562 Builder
.SetInsertPoint(End
, End
->begin());
563 PHINode
*Retval0
= Builder
.CreatePHI(IToFP
->getType(), 2);
564 Retval0
->addIncoming(A4
, IfEnd26
);
565 Retval0
->addIncoming(ConstantFP::getZero(IToFP
->getType(), false), Entry
);
567 IToFP
->replaceAllUsesWith(Retval0
);
568 IToFP
->dropAllReferences();
569 IToFP
->eraseFromParent();
572 static void scalarize(Instruction
*I
, SmallVectorImpl
<Instruction
*> &Replace
) {
573 VectorType
*VTy
= cast
<FixedVectorType
>(I
->getType());
575 IRBuilder
<> Builder(I
);
577 unsigned NumElements
= VTy
->getElementCount().getFixedValue();
578 Value
*Result
= PoisonValue::get(VTy
);
579 for (unsigned Idx
= 0; Idx
< NumElements
; ++Idx
) {
580 Value
*Ext
= Builder
.CreateExtractElement(I
->getOperand(0), Idx
);
581 Value
*Cast
= Builder
.CreateCast(cast
<CastInst
>(I
)->getOpcode(), Ext
,
582 I
->getType()->getScalarType());
583 Result
= Builder
.CreateInsertElement(Result
, Cast
, Idx
);
584 if (isa
<Instruction
>(Cast
))
585 Replace
.push_back(cast
<Instruction
>(Cast
));
587 I
->replaceAllUsesWith(Result
);
588 I
->dropAllReferences();
589 I
->eraseFromParent();
592 static bool runImpl(Function
&F
, const TargetLowering
&TLI
) {
593 SmallVector
<Instruction
*, 4> Replace
;
594 SmallVector
<Instruction
*, 4> ReplaceVector
;
595 bool Modified
= false;
597 unsigned MaxLegalFpConvertBitWidth
=
598 TLI
.getMaxLargeFPConvertBitWidthSupported();
599 if (ExpandFpConvertBits
!= llvm::IntegerType::MAX_INT_BITS
)
600 MaxLegalFpConvertBitWidth
= ExpandFpConvertBits
;
602 if (MaxLegalFpConvertBitWidth
>= llvm::IntegerType::MAX_INT_BITS
)
605 for (auto &I
: instructions(F
)) {
606 switch (I
.getOpcode()) {
607 case Instruction::FPToUI
:
608 case Instruction::FPToSI
: {
609 // TODO: This pass doesn't handle scalable vectors.
610 if (I
.getOperand(0)->getType()->isScalableTy())
613 auto *IntTy
= cast
<IntegerType
>(I
.getType()->getScalarType());
614 if (IntTy
->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth
)
617 if (I
.getOperand(0)->getType()->isVectorTy())
618 ReplaceVector
.push_back(&I
);
620 Replace
.push_back(&I
);
624 case Instruction::UIToFP
:
625 case Instruction::SIToFP
: {
626 // TODO: This pass doesn't handle scalable vectors.
627 if (I
.getOperand(0)->getType()->isScalableTy())
631 cast
<IntegerType
>(I
.getOperand(0)->getType()->getScalarType());
632 if (IntTy
->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth
)
635 if (I
.getOperand(0)->getType()->isVectorTy())
636 ReplaceVector
.push_back(&I
);
638 Replace
.push_back(&I
);
647 while (!ReplaceVector
.empty()) {
648 Instruction
*I
= ReplaceVector
.pop_back_val();
649 scalarize(I
, Replace
);
655 while (!Replace
.empty()) {
656 Instruction
*I
= Replace
.pop_back_val();
657 if (I
->getOpcode() == Instruction::FPToUI
||
658 I
->getOpcode() == Instruction::FPToSI
) {
669 class ExpandLargeFpConvertLegacyPass
: public FunctionPass
{
673 ExpandLargeFpConvertLegacyPass() : FunctionPass(ID
) {
674 initializeExpandLargeFpConvertLegacyPassPass(
675 *PassRegistry::getPassRegistry());
678 bool runOnFunction(Function
&F
) override
{
679 auto *TM
= &getAnalysis
<TargetPassConfig
>().getTM
<TargetMachine
>();
680 auto *TLI
= TM
->getSubtargetImpl(F
)->getTargetLowering();
681 return runImpl(F
, *TLI
);
684 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
685 AU
.addRequired
<TargetPassConfig
>();
686 AU
.addPreserved
<AAResultsWrapperPass
>();
687 AU
.addPreserved
<GlobalsAAWrapperPass
>();
692 PreservedAnalyses
ExpandLargeFpConvertPass::run(Function
&F
,
693 FunctionAnalysisManager
&FAM
) {
694 const TargetSubtargetInfo
*STI
= TM
->getSubtargetImpl(F
);
695 return runImpl(F
, *STI
->getTargetLowering()) ? PreservedAnalyses::none()
696 : PreservedAnalyses::all();
699 char ExpandLargeFpConvertLegacyPass::ID
= 0;
700 INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass
, "expand-large-fp-convert",
701 "Expand large fp convert", false, false)
702 INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass
, "expand-large-fp-convert",
703 "Expand large fp convert", false, false)
705 FunctionPass
*llvm::createExpandLargeFpConvertPass() {
706 return new ExpandLargeFpConvertLegacyPass();