llvm/lib/CodeGen/ExpandLargeFpConvert.cpp

   1 //===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9
  10 // This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
  11 // ‘sitofp .. to’ instructions with a bitwidth above a threshold into
  12 // auto-generated functions. This is useful for targets like x86_64 that cannot
  13 // lower fp convertions with more than 128 bits.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #include "llvm/CodeGen/ExpandLargeFpConvert.h"
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/ADT/StringExtras.h"
  20 #include "llvm/Analysis/GlobalsModRef.h"
  21 #include "llvm/CodeGen/Passes.h"
  22 #include "llvm/CodeGen/TargetLowering.h"
  23 #include "llvm/CodeGen/TargetPassConfig.h"
  24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  25 #include "llvm/IR/IRBuilder.h"
  26 #include "llvm/IR/InstIterator.h"
  27 #include "llvm/IR/PassManager.h"
  28 #include "llvm/InitializePasses.h"
  29 #include "llvm/Pass.h"
  30 #include "llvm/Support/CommandLine.h"
  31 #include "llvm/Target/TargetMachine.h"
  32
  33 using namespace llvm;
  34
  35 static cl::opt<unsigned>
  36     ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
  37                      cl::init(llvm::IntegerType::MAX_INT_BITS),
  38                      cl::desc("fp convert instructions on integers with "
  39                               "more than <N> bits are expanded."));
  40
  41 /// Generate code to convert a fp number to integer, replacing FPToS(U)I with
  42 /// the generated code. This currently generates code similarly to compiler-rt's
  43 /// implementations.
  44 ///
  45 /// An example IR generated from compiler-rt/fixsfdi.c looks like below:
  46 /// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
  47 /// entry:
  48 ///   %0 = bitcast float %a to i32
  49 ///   %conv.i = zext i32 %0 to i64
  50 ///   %tobool.not = icmp sgt i32 %0, -1
  51 ///   %conv = select i1 %tobool.not, i64 1, i64 -1
  52 ///   %and = lshr i64 %conv.i, 23
  53 ///   %shr = and i64 %and, 255
  54 ///   %and2 = and i64 %conv.i, 8388607
  55 ///   %or = or i64 %and2, 8388608
  56 ///   %cmp = icmp ult i64 %shr, 127
  57 ///   br i1 %cmp, label %cleanup, label %if.end
  58 ///
  59 /// if.end:                                           ; preds = %entry
  60 ///   %sub = add nuw nsw i64 %shr, 4294967169
  61 ///   %conv5 = and i64 %sub, 4294967232
  62 ///   %cmp6.not = icmp eq i64 %conv5, 0
  63 ///   br i1 %cmp6.not, label %if.end12, label %if.then8
  64 ///
  65 /// if.then8:                                         ; preds = %if.end
  66 ///   %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
  67 ///   br label %cleanup
  68 ///
  69 /// if.end12:                                         ; preds = %if.end
  70 ///   %cmp13 = icmp ult i64 %shr, 150
  71 ///   br i1 %cmp13, label %if.then15, label %if.else
  72 ///
  73 /// if.then15:                                        ; preds = %if.end12
  74 ///   %sub16 = sub nuw nsw i64 150, %shr
  75 ///   %shr17 = lshr i64 %or, %sub16
  76 ///   %mul = mul nsw i64 %shr17, %conv
  77 ///   br label %cleanup
  78 ///
  79 /// if.else:                                          ; preds = %if.end12
  80 ///   %sub18 = add nsw i64 %shr, -150
  81 ///   %shl = shl i64 %or, %sub18
  82 ///   %mul19 = mul nsw i64 %shl, %conv
  83 ///   br label %cleanup
  84 ///
  85 /// cleanup:                                          ; preds = %entry, %if.else, %if.then15, %if.then8
  86 ///   %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
  87 ///   ret i64 %retval.0
  88 /// }
  89 ///
  90 /// Replace fp to integer with generated code.
  91 static void expandFPToI(Instruction *FPToI) {
  92   IRBuilder<> Builder(FPToI);
  93   auto *FloatVal = FPToI->getOperand(0);
  94   IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
  95
  96   unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
  97   unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
  98
  99   // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
 100   // to i32 first following a sext/zext to target integer type.
 101   Value *A1 = nullptr;
 102   if (FloatVal->getType()->isHalfTy()) {
 103     if (FPToI->getOpcode() == Instruction::FPToUI) {
 104       Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
 105       A1 = Builder.CreateZExt(A0, IntTy);
 106     } else { // FPToSI
 107       Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
 108       A1 = Builder.CreateSExt(A0, IntTy);
 109     }
 110     FPToI->replaceAllUsesWith(A1);
 111     FPToI->dropAllReferences();
 112     FPToI->eraseFromParent();
 113     return;
 114   }
 115
 116   // fp80 conversion is implemented by fpext to fp128 first then do the
 117   // conversion.
 118   FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
 119   unsigned FloatWidth =
 120       PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
 121   unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
 122   unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
 123   Value *ImplicitBit = Builder.CreateShl(
 124       Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
 125   Value *SignificandMask =
 126       Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
 127   Value *NegOne = Builder.CreateSExt(
 128       ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
 129   Value *NegInf =
 130       Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
 131                         ConstantInt::getSigned(IntTy, BitWidth - 1));
 132
 133   BasicBlock *Entry = Builder.GetInsertBlock();
 134   Function *F = Entry->getParent();
 135   Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
 136   BasicBlock *End =
 137       Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
 138   BasicBlock *IfEnd =
 139       BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
 140   BasicBlock *IfThen5 =
 141       BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
 142   BasicBlock *IfEnd9 =
 143       BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
 144   BasicBlock *IfThen12 =
 145       BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
 146   BasicBlock *IfElse =
 147       BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
 148
 149   Entry->getTerminator()->eraseFromParent();
 150
 151   // entry:
 152   Builder.SetInsertPoint(Entry);
 153   Value *FloatVal0 = FloatVal;
 154   // fp80 conversion is implemented by fpext to fp128 first then do the
 155   // conversion.
 156   if (FloatVal->getType()->isX86_FP80Ty())
 157     FloatVal0 =
 158         Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
 159   Value *ARep0 =
 160       Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
 161   Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
 162   Value *PosOrNeg = Builder.CreateICmpSGT(
 163       ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
 164   Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
 165                                      ConstantInt::getSigned(IntTy, -1));
 166   Value *And =
 167       Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
 168   Value *And2 = Builder.CreateAnd(
 169       And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
 170   Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
 171   Value *Or = Builder.CreateOr(Abs, ImplicitBit);
 172   Value *Cmp =
 173       Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
 174   Builder.CreateCondBr(Cmp, End, IfEnd);
 175
 176   // if.end:
 177   Builder.SetInsertPoint(IfEnd);
 178   Value *Add1 = Builder.CreateAdd(
 179       And2, ConstantInt::getSigned(
 180                 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
 181   Value *Cmp3 = Builder.CreateICmpULT(
 182       Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
 183   Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
 184
 185   // if.then5:
 186   Builder.SetInsertPoint(IfThen5);
 187   Value *PosInf = Builder.CreateXor(NegOne, NegInf);
 188   Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
 189   Builder.CreateBr(End);
 190
 191   // if.end9:
 192   Builder.SetInsertPoint(IfEnd9);
 193   Value *Cmp10 = Builder.CreateICmpULT(
 194       And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
 195   Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
 196
 197   // if.then12:
 198   Builder.SetInsertPoint(IfThen12);
 199   Value *Sub13 = Builder.CreateSub(
 200       Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
 201   Value *Shr14 = Builder.CreateLShr(Or, Sub13);
 202   Value *Mul = Builder.CreateMul(Shr14, Sign);
 203   Builder.CreateBr(End);
 204
 205   // if.else:
 206   Builder.SetInsertPoint(IfElse);
 207   Value *Sub15 = Builder.CreateAdd(
 208       And2, ConstantInt::getSigned(
 209                 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
 210   Value *Shl = Builder.CreateShl(Or, Sub15);
 211   Value *Mul16 = Builder.CreateMul(Shl, Sign);
 212   Builder.CreateBr(End);
 213
 214   // cleanup:
 215   Builder.SetInsertPoint(End, End->begin());
 216   PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
 217
 218   Retval0->addIncoming(Cond8, IfThen5);
 219   Retval0->addIncoming(Mul, IfThen12);
 220   Retval0->addIncoming(Mul16, IfElse);
 221   Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
 222
 223   FPToI->replaceAllUsesWith(Retval0);
 224   FPToI->dropAllReferences();
 225   FPToI->eraseFromParent();
 226 }
 227
 228 /// Generate code to convert a fp number to integer, replacing S(U)IToFP with
 229 /// the generated code. This currently generates code similarly to compiler-rt's
 230 /// implementations. This implementation has an implicit assumption that integer
 231 /// width is larger than fp.
 232 ///
 233 /// An example IR generated from compiler-rt/floatdisf.c looks like below:
 234 /// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
 235 /// entry:
 236 ///   %cmp = icmp eq i64 %a, 0
 237 ///   br i1 %cmp, label %return, label %if.end
 238 ///
 239 /// if.end:                                           ; preds = %entry
 240 ///   %shr = ashr i64 %a, 63
 241 ///   %xor = xor i64 %shr, %a
 242 ///   %sub = sub nsw i64 %xor, %shr
 243 ///   %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
 244 ///   %cast = trunc i64 %0 to i32
 245 ///   %sub1 = sub nuw nsw i32 64, %cast
 246 ///   %sub2 = xor i32 %cast, 63
 247 ///   %cmp3 = icmp ult i32 %cast, 40
 248 ///   br i1 %cmp3, label %if.then4, label %if.else
 249 ///
 250 /// if.then4:                                         ; preds = %if.end
 251 ///   switch i32 %sub1, label %sw.default [
 252 ///     i32 25, label %sw.bb
 253 ///     i32 26, label %sw.epilog
 254 ///   ]
 255 ///
 256 /// sw.bb:                                            ; preds = %if.then4
 257 ///   %shl = shl i64 %sub, 1
 258 ///   br label %sw.epilog
 259 ///
 260 /// sw.default:                                       ; preds = %if.then4
 261 ///   %sub5 = sub nsw i64 38, %0
 262 ///   %sh_prom = and i64 %sub5, 4294967295
 263 ///   %shr6 = lshr i64 %sub, %sh_prom
 264 ///   %shr9 = lshr i64 274877906943, %0
 265 ///   %and = and i64 %shr9, %sub
 266 ///   %cmp10 = icmp ne i64 %and, 0
 267 ///   %conv11 = zext i1 %cmp10 to i64
 268 ///   %or = or i64 %shr6, %conv11
 269 ///   br label %sw.epilog
 270 ///
 271 /// sw.epilog:                                        ; preds = %sw.default, %if.then4, %sw.bb
 272 ///   %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
 273 ///   %1 = lshr i64 %a.addr.0, 2
 274 ///   %2 = and i64 %1, 1
 275 ///   %or16 = or i64 %2, %a.addr.0
 276 ///   %inc = add nsw i64 %or16, 1
 277 ///   %3 = and i64 %inc, 67108864
 278 ///   %tobool.not = icmp eq i64 %3, 0
 279 ///   %spec.select.v = select i1 %tobool.not, i64 2, i64 3
 280 ///   %spec.select = ashr i64 %inc, %spec.select.v
 281 ///   %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
 282 ///   br label %if.end26
 283 ///
 284 /// if.else:                                          ; preds = %if.end
 285 ///   %sub23 = add nuw nsw i64 %0, 4294967256
 286 ///   %sh_prom24 = and i64 %sub23, 4294967295
 287 ///   %shl25 = shl i64 %sub, %sh_prom24
 288 ///   br label %if.end26
 289 ///
 290 /// if.end26:                                         ; preds = %sw.epilog, %if.else
 291 ///   %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
 292 ///   %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
 293 ///   %conv27 = trunc i64 %shr to i32
 294 ///   %and28 = and i32 %conv27, -2147483648
 295 ///   %add = shl nuw nsw i32 %e.0, 23
 296 ///   %shl29 = add nuw nsw i32 %add, 1065353216
 297 ///   %conv31 = trunc i64 %a.addr.1 to i32
 298 ///   %and32 = and i32 %conv31, 8388607
 299 ///   %or30 = or i32 %and32, %and28
 300 ///   %or33 = or i32 %or30, %shl29
 301 ///   %4 = bitcast i32 %or33 to float
 302 ///   br label %return
 303 ///
 304 /// return:                                           ; preds = %entry, %if.end26
 305 ///   %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
 306 ///   ret float %retval.0
 307 /// }
 308 ///
 309 /// Replace integer to fp with generated code.
 310 static void expandIToFP(Instruction *IToFP) {
 311   IRBuilder<> Builder(IToFP);
 312   auto *IntVal = IToFP->getOperand(0);
 313   IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
 314
 315   unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
 316   unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
 317   // fp80 conversion is implemented by conversion tp fp128 first following
 318   // a fptrunc to fp80.
 319   FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
 320   // FIXME: As there is no related builtins added in compliler-rt,
 321   // here currently utilized the fp32 <-> fp16 lib calls to implement.
 322   FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
 323   FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
 324   unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
 325   bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
 326
 327   assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
 328                                   "assumes integer width is larger than fp.");
 329
 330   Value *Temp1 =
 331       Builder.CreateShl(Builder.getIntN(BitWidth, 1),
 332                         Builder.getIntN(BitWidth, FPMantissaWidth + 3));
 333
 334   BasicBlock *Entry = Builder.GetInsertBlock();
 335   Function *F = Entry->getParent();
 336   Entry->setName(Twine(Entry->getName(), "itofp-entry"));
 337   BasicBlock *End =
 338       Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
 339   BasicBlock *IfEnd =
 340       BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
 341   BasicBlock *IfThen4 =
 342       BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
 343   BasicBlock *SwBB =
 344       BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
 345   BasicBlock *SwDefault =
 346       BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
 347   BasicBlock *SwEpilog =
 348       BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
 349   BasicBlock *IfThen20 =
 350       BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
 351   BasicBlock *IfElse =
 352       BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
 353   BasicBlock *IfEnd26 =
 354       BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
 355
 356   Entry->getTerminator()->eraseFromParent();
 357
 358   Function *CTLZ =
 359       Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
 360   ConstantInt *True = Builder.getTrue();
 361
 362   // entry:
 363   Builder.SetInsertPoint(Entry);
 364   Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
 365   Builder.CreateCondBr(Cmp, End, IfEnd);
 366
 367   // if.end:
 368   Builder.SetInsertPoint(IfEnd);
 369   Value *Shr =
 370       Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
 371   Value *Xor = Builder.CreateXor(Shr, IntVal);
 372   Value *Sub = Builder.CreateSub(Xor, Shr);
 373   Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
 374   Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
 375   int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
 376   Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
 377                                   FloatWidth == 128 ? Call : Cast);
 378   Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
 379                                   FloatWidth == 128 ? Call : Cast);
 380   Value *Cmp3 = Builder.CreateICmpSGT(
 381       Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
 382   Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
 383
 384   // if.then4:
 385   Builder.SetInsertPoint(IfThen4);
 386   llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
 387   SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
 388   SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
 389
 390   // sw.bb:
 391   Builder.SetInsertPoint(SwBB);
 392   Value *Shl =
 393       Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
 394   Builder.CreateBr(SwEpilog);
 395
 396   // sw.default:
 397   Builder.SetInsertPoint(SwDefault);
 398   Value *Sub5 = Builder.CreateSub(
 399       Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
 400       FloatWidth == 128 ? Call : Cast);
 401   Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
 402   Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
 403                                    FloatWidth == 128 ? Sub5 : ShProm);
 404   Value *Sub8 =
 405       Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
 406                         Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
 407   Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
 408   Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
 409                                    FloatWidth == 128 ? Sub8 : ShProm9);
 410   Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
 411   Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
 412   Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
 413   Value *Or = Builder.CreateOr(Shr6, Conv11);
 414   Builder.CreateBr(SwEpilog);
 415
 416   // sw.epilog:
 417   Builder.SetInsertPoint(SwEpilog);
 418   PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
 419   AAddr0->addIncoming(Or, SwDefault);
 420   AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
 421   AAddr0->addIncoming(Shl, SwBB);
 422   Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
 423   Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
 424   Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
 425   Value *Conv16 = Builder.CreateZExt(A2, IntTy);
 426   Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
 427   Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
 428   Value *Shr18 = nullptr;
 429   if (IsSigned)
 430     Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
 431   else
 432     Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
 433   Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
 434   Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
 435   Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
 436   Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
 437   Value *ExtractT64 = nullptr;
 438   if (FloatWidth > 80)
 439     ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
 440   else
 441     ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
 442   Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
 443
 444   // if.then20
 445   Builder.SetInsertPoint(IfThen20);
 446   Value *Shr21 = nullptr;
 447   if (IsSigned)
 448     Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
 449   else
 450     Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
 451   Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
 452   Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
 453   Value *ExtractT62 = nullptr;
 454   if (FloatWidth > 80)
 455     ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
 456   else
 457     ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
 458   Builder.CreateBr(IfEnd26);
 459
 460   // if.else:
 461   Builder.SetInsertPoint(IfElse);
 462   Value *Sub24 = Builder.CreateAdd(
 463       FloatWidth == 128 ? Call : Cast,
 464       ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
 465                              -(BitWidth - FPMantissaWidth - 1)));
 466   Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
 467   Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
 468                                    FloatWidth == 128 ? Sub24 : ShProm25);
 469   Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
 470   Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
 471   Value *ExtractT66 = nullptr;
 472   if (FloatWidth > 80)
 473     ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
 474   else
 475     ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
 476   Builder.CreateBr(IfEnd26);
 477
 478   // if.end26:
 479   Builder.SetInsertPoint(IfEnd26);
 480   PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
 481   AAddr1Off0->addIncoming(ExtractT, IfThen20);
 482   AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
 483   AAddr1Off0->addIncoming(ExtractT61, IfElse);
 484   PHINode *AAddr1Off32 = nullptr;
 485   if (FloatWidth > 32) {
 486     AAddr1Off32 =
 487         Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
 488     AAddr1Off32->addIncoming(ExtractT62, IfThen20);
 489     AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
 490     AAddr1Off32->addIncoming(ExtractT66, IfElse);
 491   }
 492   PHINode *E0 = nullptr;
 493   if (FloatWidth <= 80) {
 494     E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
 495     E0->addIncoming(Sub1, IfThen20);
 496     E0->addIncoming(Sub2, SwEpilog);
 497     E0->addIncoming(Sub2, IfElse);
 498   }
 499   Value *And29 = nullptr;
 500   if (FloatWidth > 80) {
 501     Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
 502                                      Builder.getIntN(BitWidth, 63));
 503     And29 = Builder.CreateAnd(Shr, Temp2, "and29");
 504   } else {
 505     Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
 506     And29 = Builder.CreateAnd(
 507         Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
 508   }
 509   unsigned TempMod = FPMantissaWidth % 32;
 510   Value *And34 = nullptr;
 511   Value *Shl30 = nullptr;
 512   if (FloatWidth > 80) {
 513     TempMod += 32;
 514     Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
 515     Shl30 = Builder.CreateAdd(
 516         Add,
 517         Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
 518     And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
 519   } else {
 520     Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
 521     Shl30 = Builder.CreateAdd(
 522         Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
 523     And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
 524                               Builder.getIntN(32, (1 << TempMod) - 1));
 525   }
 526   Value *Or35 = nullptr;
 527   if (FloatWidth > 80) {
 528     Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
 529     Value *Or31 = Builder.CreateOr(And29Trunc, And34);
 530     Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
 531     Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
 532                                      Builder.getIntN(128, FPMantissaWidth));
 533     Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
 534     Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
 535     Or35 = Builder.CreateOr(Or34, A6);
 536   } else {
 537     Value *Or31 = Builder.CreateOr(And34, And29);
 538     Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
 539   }
 540   Value *A4 = nullptr;
 541   if (IToFP->getType()->isDoubleTy()) {
 542     Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
 543     Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
 544     Value *And1 =
 545         Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
 546     Value *Or1 = Builder.CreateOr(Shl1, And1);
 547     A4 = Builder.CreateBitCast(Or1, IToFP->getType());
 548   } else if (IToFP->getType()->isX86_FP80Ty()) {
 549     Value *A40 =
 550         Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
 551     A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
 552   } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
 553     // Deal with "half" situation. This is a workaround since we don't have
 554     // floattihf.c currently as referring.
 555     Value *A40 =
 556         Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
 557     A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
 558   } else // float type
 559     A4 = Builder.CreateBitCast(Or35, IToFP->getType());
 560   Builder.CreateBr(End);
 561
 562   // return:
 563   Builder.SetInsertPoint(End, End->begin());
 564   PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
 565   Retval0->addIncoming(A4, IfEnd26);
 566   Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
 567
 568   IToFP->replaceAllUsesWith(Retval0);
 569   IToFP->dropAllReferences();
 570   IToFP->eraseFromParent();
 571 }
 572
 573 static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
 574   VectorType *VTy = cast<FixedVectorType>(I->getType());
 575
 576   IRBuilder<> Builder(I);
 577
 578   unsigned NumElements = VTy->getElementCount().getFixedValue();
 579   Value *Result = PoisonValue::get(VTy);
 580   for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
 581     Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
 582     Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
 583                                      I->getType()->getScalarType());
 584     Result = Builder.CreateInsertElement(Result, Cast, Idx);
 585     if (isa<Instruction>(Cast))
 586       Replace.push_back(cast<Instruction>(Cast));
 587   }
 588   I->replaceAllUsesWith(Result);
 589   I->dropAllReferences();
 590   I->eraseFromParent();
 591 }
 592
 593 static bool runImpl(Function &F, const TargetLowering &TLI) {
 594   SmallVector<Instruction *, 4> Replace;
 595   SmallVector<Instruction *, 4> ReplaceVector;
 596   bool Modified = false;
 597
 598   unsigned MaxLegalFpConvertBitWidth =
 599       TLI.getMaxLargeFPConvertBitWidthSupported();
 600   if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
 601     MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
 602
 603   if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
 604     return false;
 605
 606   for (auto &I : instructions(F)) {
 607     switch (I.getOpcode()) {
 608     case Instruction::FPToUI:
 609     case Instruction::FPToSI: {
 610       // TODO: This pass doesn't handle scalable vectors.
 611       if (I.getOperand(0)->getType()->isScalableTy())
 612         continue;
 613
 614       auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
 615       if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
 616         continue;
 617
 618       if (I.getOperand(0)->getType()->isVectorTy())
 619         ReplaceVector.push_back(&I);
 620       else
 621         Replace.push_back(&I);
 622       Modified = true;
 623       break;
 624     }
 625     case Instruction::UIToFP:
 626     case Instruction::SIToFP: {
 627       // TODO: This pass doesn't handle scalable vectors.
 628       if (I.getOperand(0)->getType()->isScalableTy())
 629         continue;
 630
 631       auto *IntTy =
 632           cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
 633       if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
 634         continue;
 635
 636       if (I.getOperand(0)->getType()->isVectorTy())
 637         ReplaceVector.push_back(&I);
 638       else
 639         Replace.push_back(&I);
 640       Modified = true;
 641       break;
 642     }
 643     default:
 644       break;
 645     }
 646   }
 647
 648   while (!ReplaceVector.empty()) {
 649     Instruction *I = ReplaceVector.pop_back_val();
 650     scalarize(I, Replace);
 651   }
 652
 653   if (Replace.empty())
 654     return false;
 655
 656   while (!Replace.empty()) {
 657     Instruction *I = Replace.pop_back_val();
 658     if (I->getOpcode() == Instruction::FPToUI ||
 659         I->getOpcode() == Instruction::FPToSI) {
 660       expandFPToI(I);
 661     } else {
 662       expandIToFP(I);
 663     }
 664   }
 665
 666   return Modified;
 667 }
 668
 669 namespace {
 670 class ExpandLargeFpConvertLegacyPass : public FunctionPass {
 671 public:
 672   static char ID;
 673
 674   ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
 675     initializeExpandLargeFpConvertLegacyPassPass(
 676         *PassRegistry::getPassRegistry());
 677   }
 678
 679   bool runOnFunction(Function &F) override {
 680     auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
 681     auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
 682     return runImpl(F, *TLI);
 683   }
 684
 685   void getAnalysisUsage(AnalysisUsage &AU) const override {
 686     AU.addRequired<TargetPassConfig>();
 687     AU.addPreserved<AAResultsWrapperPass>();
 688     AU.addPreserved<GlobalsAAWrapperPass>();
 689   }
 690 };
 691 } // namespace
 692
 693 PreservedAnalyses ExpandLargeFpConvertPass::run(Function &F,
 694                                                 FunctionAnalysisManager &FAM) {
 695   const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
 696   return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()
 697                                                : PreservedAnalyses::all();
 698 }
 699
 700 char ExpandLargeFpConvertLegacyPass::ID = 0;
 701 INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
 702                       "Expand large fp convert", false, false)
 703 INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
 704                     "Expand large fp convert", false, false)
 705
 706 FunctionPass *llvm::createExpandLargeFpConvertPass() {
 707   return new ExpandLargeFpConvertLegacyPass();
 708 }